This code is for my senior thesis and analyzes field and remotely-sensed data. Generative AI from ChatGPT3.5 assisted in the making of this code. In this code, I used the forest classification names dense and established interchangeably. They both denote terra firma forest. Similar, the riverine forest type is the same as the swamp or flooded forest type. This code is sequential: products from previous chunks are used in subsequent ones.

—- PRELIMINARIES AND DATA UPLOADS —- Uploading all data from “DATA” folder.

Adding to library

# install necessary libraries 
library(ape)
library(car)
## Loading required package: carData
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following object is masked from 'package:ape':
## 
##     where
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(dunn.test)
library(FSA)
## Registered S3 methods overwritten by 'FSA':
##   method       from
##   confint.boot car 
##   hist.boot    car
## ## FSA v0.9.5. See citation('FSA') if used in publication.
## ## Run fishR() for related website and fishR('IFAR') for related book.
## 
## Attaching package: 'FSA'
## The following object is masked from 'package:car':
## 
##     bootCase
library(ForestGapR)
library(ForestTools)
library(foreach)
library(ggplot2)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:ape':
## 
##     rotate
library(ggsignif)
library(lattice)
library(lidR)
library(multcompView)
library(raster)
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following objects are masked from 'package:lidR':
## 
##     projection, projection<-
## The following object is masked from 'package:dplyr':
## 
##     select
library(readr)
library(RColorBrewer)
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
## 
## Attaching package: 'sf'
## The following object is masked from 'package:lidR':
## 
##     st_concave_hull
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Registered S3 method overwritten by 'spdep':
##   method   from
##   plot.mst ape
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:raster':
## 
##     extract
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::accumulate() masks foreach::accumulate()
## ✖ tidyr::extract()    masks raster::extract()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ dplyr::lag()        masks stats::lag()
## ✖ dplyr::recode()     masks car::recode()
## ✖ raster::select()    masks dplyr::select()
## ✖ purrr::some()       masks car::some()
## ✖ purrr::when()       masks foreach::when()
## ✖ dplyr::where()      masks ape::where()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(vegan)
## Loading required package: permute
## This is vegan 2.6-4
library(viridis)
## Loading required package: viridisLite

Upload MATLAB processed data products and field data ready for analysis

setwd("/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/") # set working directory

# row expansion done in matlab  
PW <- read_csv("plot_w_WD.csv") # read in data on all expanded rows organized with matlab data_work.m
## Rows: 12142 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Plots1, Plots2, Plots3, Plots4
## dbl (8): Plots5, Plots6, Plots7, Plots8, Plots9, Plots10, Plots11, Plots12
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PW) # examine data
## # A tibble: 6 × 12
##   Plots1 Plots2 Plots3 Plots4 Plots5 Plots6 Plots7 Plots8 Plots9 Plots10 Plots11
##   <chr>  <chr>  <chr>  <chr>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
## 1 S7     savan… S7NW   XAS         9      3      5      0      0       0   0.518
## 2 S7     savan… S7NW   XAS         9      3      5      0      0       0   0.518
## 3 S7     savan… S7NW   XAS         9      3      5      0      0       0   0.518
## 4 S7     savan… S7NW   XAS         9      3      5      0      0       0   0.518
## 5 S7     savan… S7NW   XAS         9      3      5      0      0       0   0.518
## 6 S7     savan… S7NW   XAS         9      3      5      0      0       0   0.518
## # ℹ 1 more variable: Plots12 <dbl>
PL <- read_csv("plot_L.csv") # read in data on large expanded rows organized with matlab data_work.m
## Rows: 226 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Large_tal1, Large_tal2, Large_tal3, Large_tal4
## dbl (4): Large_tal5, Large_tal6, Large_tal7, Large_tal8
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PL) # examine data
## # A tibble: 6 × 8
##   Large_tal1 Large_tal2 Large_tal3 Large_tal4 Large_tal5 Large_tal6 Large_tal7
##   <chr>      <chr>      <chr>      <chr>           <dbl>      <dbl>      <dbl>
## 1 D1_1985    succession 1985NW     ZCM                 1          0      0.647
## 2 D1_1985    succession 1985NW     XCA                 2          0      0.799
## 3 D1_1985    succession 1985NW     XCA                 2          0      0.799
## 4 D1_1985    succession 1985NW     ZLA                 1          0      0.898
## 5 D1_1985    succession 1985NW     ZMM                 1          0      0.588
## 6 D1_1985    succession 1985SW     ZRW                 1          0      0.744
## # ℹ 1 more variable: Large_tal8 <dbl>
PM <- read_csv("plot_M.csv") # read in data on medium expanded rows organized with matlab data_work.m
## Rows: 3800 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Medium_tal1, Medium_tal2, Medium_tal3, Medium_tal4
## dbl (4): Medium_tal5, Medium_tal6, Medium_tal7, Medium_tal8
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PM) # examine data
## # A tibble: 6 × 8
##   Medium_tal1 Medium_tal2 Medium_tal3 Medium_tal4 Medium_tal5 Medium_tal6
##   <chr>       <chr>       <chr>       <chr>             <dbl>       <dbl>
## 1 S7          savanna     S7NW        XAS                   5           0
## 2 S7          savanna     S7NW        XAS                   5           0
## 3 S7          savanna     S7NW        XAS                   5           0
## 4 S7          savanna     S7NW        XAS                   5           0
## 5 S7          savanna     S7NW        XAS                   5           0
## 6 S7          savanna     S7NW        XHA                   2           4
## # ℹ 2 more variables: Medium_tal7 <dbl>, Medium_tal8 <dbl>
PS <- read_csv("plot_S.csv") # read in data on small expanded rows organized with matlab data_work.m
## Rows: 8116 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Small_tal1, Small_tal2, Small_tal3, Small_tal4
## dbl (4): Small_tal5, Small_tal6, Small_tal7, Small_tal8
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(PS) # examine data
## # A tibble: 6 × 8
##   Small_tal1 Small_tal2 Small_tal3 Small_tal4 Small_tal5 Small_tal6 Small_tal7
##   <chr>      <chr>      <chr>      <chr>           <dbl>      <dbl>      <dbl>
## 1 S7         savanna    S7NW       XAS                 9          3      0.518
## 2 S7         savanna    S7NW       XAS                 9          3      0.518
## 3 S7         savanna    S7NW       XAS                 9          3      0.518
## 4 S7         savanna    S7NW       XAS                 9          3      0.518
## 5 S7         savanna    S7NW       XAS                 9          3      0.518
## 6 S7         savanna    S7NW       XAS                 9          3      0.518
## # ℹ 1 more variable: Small_tal8 <dbl>
FEPS <- read_csv("FEPS.csv") # read in data on non-expanded all rows
## New names:
## Rows: 1526 Columns: 11
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Plot, Class, 10m_Subplot, Species dbl (6): C1, C2, C3, C4, C5, C6 lgl (1):
## ...11
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...11`
head(PS) # examine data
## # A tibble: 6 × 8
##   Small_tal1 Small_tal2 Small_tal3 Small_tal4 Small_tal5 Small_tal6 Small_tal7
##   <chr>      <chr>      <chr>      <chr>           <dbl>      <dbl>      <dbl>
## 1 S7         savanna    S7NW       XAS                 9          3      0.518
## 2 S7         savanna    S7NW       XAS                 9          3      0.518
## 3 S7         savanna    S7NW       XAS                 9          3      0.518
## 4 S7         savanna    S7NW       XAS                 9          3      0.518
## 5 S7         savanna    S7NW       XAS                 9          3      0.518
## 6 S7         savanna    S7NW       XAS                 9          3      0.518
## # ℹ 1 more variable: Small_tal8 <dbl>
DBH <- read_csv("DBH.csv") # read in data of DBHs by plot
## Rows: 772 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Plot, Type, Sp
## dbl (1): DBH
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(DBH) # examine data
## # A tibble: 6 × 4
##   Plot  Type       Sp      DBH
##   <chr> <chr>      <chr> <dbl>
## 1 DU_1  succession ZRA    5.1 
## 2 DU_1  succession ZPH    8.43
## 3 DU_1  succession KIE    8   
## 4 DU_1  succession ZHU   44.5 
## 5 DU_1  succession ZPH    5   
## 6 DU_1  succession ZPC    7.1
MRes <- read_csv("MRes.csv") # read in non-spatial mar CHM data
## Rows: 417424 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Longitude, Latitude, CHM_Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(MRes) # examine data
## # A tibble: 6 × 3
##   Longitude Latitude CHM_Value
##       <dbl>    <dbl>     <dbl>
## 1   527386.   85239.        26
## 2   527387.   85239.        25
## 3   527388.   85239.        26
## 4   527389.   85239.        27
## 5   527390.   85239.        27
## 6   527391.   85239.        28
RRes <- read_csv("RRes.csv") # read in non-spatial riv CHM data
## Rows: 416998 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Longitude, Latitude, CHM_Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(MRes) # examine data
## # A tibble: 6 × 3
##   Longitude Latitude CHM_Value
##       <dbl>    <dbl>     <dbl>
## 1   527386.   85239.        26
## 2   527387.   85239.        25
## 3   527388.   85239.        26
## 4   527389.   85239.        27
## 5   527390.   85239.        27
## 6   527391.   85239.        28
DRes <- read_csv("DRes.csv") # read in non-spatial den CHM data
## Rows: 417212 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): Longitude, Latitude, CHM_Value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(MRes) # examine data
## # A tibble: 6 × 3
##   Longitude Latitude CHM_Value
##       <dbl>    <dbl>     <dbl>
## 1   527386.   85239.        26
## 2   527387.   85239.        25
## 3   527388.   85239.        26
## 4   527389.   85239.        27
## 5   527390.   85239.        27
## 6   527391.   85239.        28
SPDWD <- read_csv("SPWDFF.csv") # read in wood density data 
## Rows: 277 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Code, Name, HERB/WOODY, SAV/FOR-G/FOR-F/VILL/TM/BAI/PRIM/SEC/U, Dis...
## dbl (6): AVG_WOOD_D, VAR, STDV, RNG_LB, RNG_UB, N
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(SPDWD) # examine data
## # A tibble: 6 × 12
##   Code  Name          `HERB/WOODY` SAV/FOR-G/FOR-F/VILL…¹ Disperser (EL, GOR, …²
##   <chr> <chr>         <chr>        <chr>                  <chr>                 
## 1 XAS   Annona seneg… WOODY        SAV                    EL, GOR, MON, CH, BIRD
## 2 XHA   Hymenocardia… WOODY        SAV                    WIND                  
## 3 XBF   Bridelia fer… WOODY        SAV                    BIRD                  
## 4 XMA   Maprounea af… WOODY        SAV                    BIRD                  
## 5 XHD   Hyparrhenia … HERB         <NA>                   <NA>                  
## 6 XAC   Andropogon s… HERB         <NA>                   <NA>                  
## # ℹ abbreviated names: ¹​`SAV/FOR-G/FOR-F/VILL/TM/BAI/PRIM/SEC/U`,
## #   ²​`Disperser (EL, GOR, MON, CH, HOG, BIRD, US, WIND, ALL, CAP, U)`
## # ℹ 7 more variables: AVG_WOOD_D <dbl>, LEVEL <chr>, VAR <dbl>, STDV <dbl>,
## #   RNG_LB <dbl>, RNG_UB <dbl>, N <dbl>
SPDWDV <- read_csv("WDVar.csv") # read in wood density data 
## New names:
## Rows: 277 Columns: 15
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Code, Name, HERB/WOODY, LEVEL dbl (1): v lgl (10): ...6, ...7, ...8, ...9,
## ...10, ...11, ...12, ...13, ...14, ...15
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
head(SPDWDV) # examine data
## # A tibble: 6 × 15
##   Code  Name  `HERB/WOODY` LEVEL     v ...6  ...7  ...8  ...9  ...10 ...11 ...12
##   <chr> <chr> <chr>        <chr> <dbl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl> <lgl>
## 1 XAS   Anno… WOODY        GE     0.74 NA    NA    NA    NA    NA    NA    NA   
## 2 XHA   Hyme… WOODY        GE     0.74 NA    NA    NA    NA    NA    NA    NA   
## 3 XBF   Brid… WOODY        GE     0.74 NA    NA    NA    NA    NA    NA    NA   
## 4 XMA   Mapr… WOODY        GE     0.74 NA    NA    NA    NA    NA    NA    NA   
## 5 XHD   Hypa… HERB         <NA>  NA    NA    NA    NA    NA    NA    NA    NA   
## 6 XAC   Andr… HERB         <NA>  NA    NA    NA    NA    NA    NA    NA    NA   
## # ℹ 3 more variables: ...13 <lgl>, ...14 <lgl>, ...15 <lgl>
subD <- read_csv("subset_DBH.csv") # read in DBH and connected density data 
## Rows: 347 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Plot, Type, Sp
## dbl (2): DBH, WD
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(subD) # examine data
## # A tibble: 6 × 5
##   Plot  Type     Sp      DBH    WD
##   <chr> <chr>    <chr> <dbl> <dbl>
## 1 R5    riverine YTR     8.8 0.475
## 2 R5    riverine RGO    13.9 0.690
## 3 R5    riverine RNP     8.8 0.642
## 4 R5    riverine RDG     8.5 0.583
## 5 R5    riverine RDG     6   0.583
## 6 R5    riverine RDG    17.7 0.583
subset_DBH <- subD

SDBH <- read_csv("SDBH.csv") # read in DBH and connected variance of density data 
## Rows: 347 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Plot, Type, Sp
## dbl (5): DBH, WD, var, WD_ceil, WD_floor
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(SDBH) # examine data
## # A tibble: 6 × 8
##   Plot  Type     Sp      DBH    WD   var WD_ceil WD_floor
##   <chr> <chr>    <chr> <dbl> <dbl> <dbl>   <dbl>    <dbl>
## 1 R5    riverine YTR     8.8 0.475  0.74   0.598    0.351
## 2 R5    riverine RGO    13.9 0.690  0.74   0.870    0.511
## 3 R5    riverine RNP     8.8 0.642  0.74   0.809    0.475
## 4 R5    riverine RDG     8.5 0.583  0.34   0.968    0.198
## 5 R5    riverine RDG     6   0.583  0.34   0.968    0.198
## 6 R5    riverine RDG    17.7 0.583  0.34   0.968    0.198
SDBH
## # A tibble: 347 × 8
##    Plot  Type     Sp      DBH    WD   var WD_ceil WD_floor
##    <chr> <chr>    <chr> <dbl> <dbl> <dbl>   <dbl>    <dbl>
##  1 R5    riverine YTR     8.8 0.475  0.74   0.598    0.351
##  2 R5    riverine RGO    13.9 0.690  0.74   0.870    0.511
##  3 R5    riverine RNP     8.8 0.642  0.74   0.809    0.475
##  4 R5    riverine RDG     8.5 0.583  0.34   0.968    0.198
##  5 R5    riverine RDG     6   0.583  0.34   0.968    0.198
##  6 R5    riverine RDG    17.7 0.583  0.34   0.968    0.198
##  7 R5    riverine RNP    25.5 0.642  0.74   0.809    0.475
##  8 R5    riverine RDG     7.3 0.583  0.34   0.968    0.198
##  9 R5    riverine RPM    25.4 0.585  0.34   0.971    0.199
## 10 R5    riverine RBL    34.2 0.714  0.74   0.900    0.529
## # ℹ 337 more rows

Upload raster data

setwd("/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/") # set working directory

#### CHM ####
tif_path <- "CHMs.tif" # path
CHM <- raster(tif_path) # read in
#print(CHM) # basic information
#plot(CHM) # plot

#### LiDAR LAND CLASSIFICATION ####
tif_path <- "LiDAR_B_LC.tif" # path
LidLC <- raster(tif_path) # read in
crs(LidLC) <- st_crs(CHM) # convert CRS
#print(LidLC) # basic information
#plot(LidLC) # plot

#### INTEGRATED GROUND CONTROL POINT AND LiDAR LAND CLASSIFICATION ####
tif_path <- "GCP_B_LC.tif" # path
GCPLC <- raster(tif_path) # read in
crs(GCPLC) <- st_crs(CHM) # convert CRS
#print(GCPLC) # basic information
#plot(GCPLC) # plot

#### SPATIAL CLIPPED CHM FROM CLASSIFIED SECTIONS OF LiDAR ####
## Mar
CHM_MARs <- raster("CHM_clipped_mar_sub.tif") # read in
#print(CHM_MARs) # basic information
#plot(CHM_MARs) # plot

## Riv
CHM_RIVs <- raster("CHM_clipped_riv_sub.tif") # read in
#print(CHM_RIVs) # basic information
#plot(CHM_RIVs) # plot

## Den
CHM_DENs <- raster("CHM_clipped_den_sub.tif") # read in
#print(CHM_DENs) # basic information
#plot(CHM_DENs) # plot

#### LANDSAT MOSAIC RASTER + PRODUCTS AND BANDS OF INTEREST ####
# Load the mosaic raster in
RastMos <- raster("RastMosa.tif")
# Check the structure and information of the raster
#print(RastMos)

# Load the ndvi raster in
ndv <- raster("ndv.tif")
# Check the structure and information of the raster
#print(ndv)

# Load the band 1 raster in
B1 <- raster("Band1.tif")
# Check the structure and information of the raster
#print(B1)

# Load the band 2 raster in
B2 <- raster("Band2.tif")
# Check the structure and information of the raster
#print(B2)

# Load the band 3 raster in
B3 <- raster("Band3.tif")
# Check the structure and information of the raster
#print(B3)

# Load the band 4 raster in
B4 <- raster("Band4.tif")
# Check the structure and information of the raster
#print(B4)

# Load the band 5 raster in
B5 <- raster("Band5.tif")
# Check the structure and information of the raster
#print(B5)

# Load the band 6 raster in
B6 <- raster("Band6.tif")
# Check the structure and information of the raster
#print(B6)

# Load the band 7 raster in
B7 <- raster("Band7.tif")
# Check the structure and information of the raster
#print(B7)

# Load the band 8 raster in
B8 <- raster("Band8.tif")
# Check the structure and information of the raster
#print(B8)

# Load the DTM elevation raster in
elev <- raster("Elev.tif")
# Check the structure and information of the raster
#print(elev)

# Load a lower resolution CHM raster in, reclassified for canopy gap and averaged for 10m x 10m pixels to match the LANDSAT mosaic's resolution
TENmresGap <- raster("TenMrescgap.tif") # this is an intermediate raster -- not as necessary
# Check the structure and information of the raster
#print(TENmresGap)

# Load the C prediction raster multiplied by land classsificaion number in
TimesLC <- raster("TimesLC.tif") # this is an intermediate raster -- not as necessary
# Check the structure and information of the raster
#print(TimesLC)

# Load the raster of only marantacae class C predictions in
C_onlyMAR <- raster("trempR1.tif")
# Check the structure and information of the raster
#print(C_onlyMAR)

# Load the raster of only marantacae class C predictions in
C_onlyMARC <- raster("tempMC.tif")
# Check the structure and information of the raster
#print(C_onlyMARC)

# Load the raster of only marantacae class C predictions in
C_onlyMARF <- raster("tempMF.tif")
# Check the structure and information of the raster
#print(C_onlyMARF)

#### CLIPPED RANDOM POLYGONS OF INTEREST ####
# Load the buffered points in
bufpts <- st_read("bps.shp")
## Reading layer `bps' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/bps.shp' using driver `ESRI Shapefile'
## Simple feature collection with 14 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 479804.6 ymin: 56113.92 xmax: 528985.2 ymax: 84417.4
## Projected CRS: Undefined Cartesian SRS with unknown unit
# Print information about the loaded dataset
#print(bufpts)

Upload polygon and point data

setwd("/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/") # set working directory

#### GROUND SAMPLING PLOT POINTS ####
# spatial points data frame for the point coordinates collected and plotted in QGIS
F100 <- data.frame(x = 492806.13198, y = 66476.26328)
F110 <- data.frame(x = 492805.8004 , y = 66666.5964)
F200 <- data.frame(x = 479814.5850, y = 56123.9184)
F300 <- data.frame(x = 485718.2253, y = 58310.0686)
F330 <- data.frame(x = 485838.1697, y = 58052.0881)
M1 <- data.frame(x = 528655, y = 84129)
M2 <- data.frame(x = 528975.2202, y = 84294.3450)
M3 <- data.frame(x = 528565.3114, y = 84356.5495)
M4 <- data.frame(x = 528950.0673, y = 84405.4275)
M5 <- data.frame(x = 528417.1, y = 84407.4)
R20 <- data.frame(x = 487629.0835, y = 64719.7980)
R30 <- data.frame(x = 489559.1121, y = 68289.3393)
R4 <- data.frame(x = 489191.9280, y = 68426.5140)
R5 <- data.frame(x = 487602.9289, y = 64402.2447)
# specify each spatial point 
points <- lapply(list(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5), function(df) {
  coords <- df[, c("x", "y")] # coordinates
  spatial_points <- SpatialPoints(coords)
  proj4string(spatial_points) <- CRS(proj4string(CHM)) # same coordinate reference system as CHM
  return(spatial_points) # return
})
# combine to spatial points dataframe
all_points <- rbind(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5)
points_sp <- SpatialPointsDataFrame(coords = all_points[, c("x", "y")], data = all_points) # spatial coordinates
#print(points_sp) # basic information
#plot(points_sp) # plot
# to export the points
#sf_points <- st_as_sf(points_sp)
#shapefile_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/points_sp.shp"
#st_write(sf_points, dsn = shapefile_path)

#### BUFFERED 20 x 20 M FIELD PLOTS ####
shp_path <- "BufferFieldPlots.geojson" #path 
Bfield <- st_read(shp_path) # read
## Reading layer `BufferFieldPlots' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/BufferFieldPlots.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 14 features and 2 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 479804.6 ymin: 56113.92 xmax: 528985.2 ymax: 84417.4
## Projected CRS: WGS 84 / UTM zone 33N
Bfield <- st_transform(Bfield, crs = crs(CHM)) # convert CRS
#print(Bfield) # basic information
#plot(Bfield$geometry) # plot

#### IMBALANGA POLYGON ####
shp_path <- "polyI.gpkg" #path 
clp <- st_read(shp_path) # read
## Reading layer `polyimb' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/polyI.gpkg' using driver `GPKG'
## Simple feature collection with 1 feature and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 526769.9 ymin: 83475.07 xmax: 529577.5 ymax: 86230.74
## Projected CRS: WGS 84 / UTM zone 33N
clp <- st_transform(clp, crs = crs(CHM)) # convert CRS
#print(clp) # basic information
#plot(clp) # plot

#### CHM POLYGON FOOTPRINT ####
shp_path <- "FOOTP.gpkg" #  path
FOOTP <- st_read(shp_path) # read 
## Reading layer `footrpint' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/FOOTP.gpkg' using driver `GPKG'
## Simple feature collection with 3 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 518521.2 ymin: 66491.91 xmax: 534650.9 ymax: 101275.4
## Projected CRS: WGS 84 / UTM zone 33N
FOOTP <- st_transform(FOOTP, crs = crs(CHM)) # convert CRS
#print(FOOTP) # info
#plot(FOOTP) # plot

#### SAMPLE POLYGONS ####
shp_path <- "PlotsRandom.gpkg" # path
Pltz <- st_read(shp_path) # read
## Reading layer `buffered' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/PlotsRandom.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 3000 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 518507.9 ymin: 66488.79 xmax: 534669.3 ymax: 101275.4
## Projected CRS: WGS 84 / UTM zone 33N
Pltz <- st_transform(Pltz, crs = crs(CHM)) # convert CRS
#print(Pltz) # info
#plot(Pltz) # plot

#### LAND CLASSIFICATION SELECTED POLYGONS ####
## Riv
RivSub <- st_read("RivSub.geojson") # read in
## Reading layer `RivSub' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/RivSub.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 849 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 518858.8 ymin: 66662.76 xmax: 534661.8 ymax: 101269.8
## Geodetic CRS:  WGS 84
#print(RivSub) # basic information
#plot(RivSub) # plot

## Mar
MarSub <- st_read("MarSub.geojson") # read in
## Reading layer `MarSub' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/MarSub.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 849 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 518664.6 ymin: 66522.39 xmax: 534652.3 ymax: 100187.2
## Geodetic CRS:  WGS 84
#print(MarSub) # basic information
#plot(MarSub) # plot

## Den
DenSub <- st_read("DenSub.geojson") # read in
## Reading layer `DenSub' from data source 
##   `/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/DenSub.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 849 features and 3 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 518512 ymin: 66489.62 xmax: 534503.6 ymax: 101250.4
## Geodetic CRS:  WGS 84
#print(DenSub) # basic information
#plot(DenSub) # plot

Info on color scheme and land classification number correspondence !DO NOT RUN!

# values, coloring, and classification all listed here
'darkseagreen2' # marantacae = 1
## [1] "darkseagreen2"
'plum3' # dense = 9
## [1] "plum3"
'tan' # bai = 10
## [1] "tan"
'midnightblue' # water = 90
## [1] "midnightblue"
'chartreuse4' # riverine = 100
## [1] "chartreuse4"
'chocolate4' # savanna = 1000 
## [1] "chocolate4"
'gray9' # urban = 10000
## [1] "gray9"

—- FIELD DATA ANALYSIS —-

Rarefaction

#### GET DATA ####
# calculations done in MATLAB alpha.m program for species richness -- data is numbers of unique species per plot per habitat adding on to the past plot
dense <- c(36, 67, 89, 100, 105)
marantaceae <- c(11, 23, 27, 29, 30)
riverine <- c(24, 51, 69, 84, 0)  # the fifth value for riverine DNE but code will not run if there is not a placeholder which is deleted later on in the code and will not be included in any actual analysis. 

# combine all data into a single matrix
all_data <- cbind(dense, marantaceae, riverine)

# create a vector of colors for lines and points for eventual plotting
all_line_colors <- c('plum3',  'darkseagreen2', 'chartreuse4')
all_point_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')



#### FIT LOG GROWTH FUNCTION ####
# logistic growth function to fit to data points
logistic_growth <- function(x, a, b, c) {
  a / (1 + exp(-b * (x - c)))
}
# fit logistic growth model function and print results
fit_logistic_growth <- function(x_data, y_data, dataset_name, plot_color) {
  fit <- nls(y_data ~ logistic_growth(x_data, a, b, c),
             start = list(a = max(y_data), b = 1, c = mean(x_data)))

  coefficients <- coef(fit) # get coefficients
  cat("Fitted Coefficients for", dataset_name, ":\n", coefficients, "\n")

  #### PLOTTING INFORMATION ####
  
  # generate points for the fitted curve
  x_fit <- seq(min(x_data), 15, length.out = 100)
  y_fit <- predict(fit, newdata = data.frame(x_data = x_fit))

  # plot data and fitted logistic growth curve with an expanded x-axis range
  lines(x_fit, y_fit, col = plot_color, lty = 1)

  # plot data points
  points(x_data, y_data, pch = 16, col = plot_color)
  
  # add R-squared value to the plot
  rsquared <- 1 - (sum(residuals(fit)^2) / sum((y_data - mean(y_data))^2))
  
  # calculate 1 SD of residuals
  sd_residuals <- sd(residuals(fit))
  
  # get information on the 1 standard deviation bounds
  asymptote <- coefficients["a"] # asymptote
  # get upper and lower bounds based on sd residuals
  upper_bound <- asymptote + sd_residuals 
  lower_bound <- asymptote - sd_residuals
  #print(asymptote) # view
  #print(sd_residuals) # view
  #print(upper_bound) # view
  #print(lower_bound) # view
  abline(h = asymptote, col = plot_color, lty = 2) # line at asymptote

  cat("R-squared value for", dataset_name, ":", rsquared, "\n")
  cat("Asymptote for", dataset_name, ":", asymptote, "\n\n")
}



#### PLOT ####

# x data for plotting
x_data <- c(1, 2, 3, 4, 5)
riverine <- c(24, 51, 69, 84) # amended riverine data without extra value

# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/raref.pdf"
# Open a PDF device
#pdf(pdf_path)

# Create a new plot
plot(x_data, dense, pch = 16, main = "Rarefaction", xlab = "Plot number", ylab = "Species richness", xlim = c(min(x_data), 15), ylim = c(0, 120))
fit_logistic_growth(x_data, dense, "dense", "plum3")
## Fitted Coefficients for dense :
##  106.4216 1.159431 1.565719 
## R-squared value for dense : 0.9996046 
## Asymptote for dense : 106.4216
fit_logistic_growth(x_data, marantaceae, "marantaceae", "darkseagreen2")
## Fitted Coefficients for marantaceae :
##  29.43998 1.672137 1.293561 
## R-squared value for marantaceae : 0.9943621 
## Asymptote for marantaceae : 29.43998
# Use the original x_data for riverine
x_data_riverine <- c(1, 2, 3, 4)
fit_logistic_growth(x_data_riverine, riverine, "riverine", "chartreuse4")
## Fitted Coefficients for riverine :
##  90.41239 1.129562 1.849361 
## R-squared value for riverine : 0.995032 
## Asymptote for riverine : 90.41239
# add a vertical line at x = 4: sampling cutoff
abline(v = 4, col = "red", lty = 1)

# add legend with points and asymptotes, organized by forest type with bold subheadings and title
legend("right", legend = c(expression(bold("Dense forest")), "   Collected data points", "   Fitted curve", "   Asymptote", expression(bold("Marantaceae forest")), "   Collected data points", expression("   Fitted curve"), "   Asymptote", expression(bold("Riverine forest")), "   Collected data points", "   Fitted curve", "   Asymptote", expression(bold("Sampling cutoff"))), 
       col = c(NA, "plum3", "plum3", "plum3", NA, "darkseagreen2", "darkseagreen2", "darkseagreen2", NA, "chartreuse4", "chartreuse4", "chartreuse4", "red"), 
       lty = c(NA, NA, 1, 2, NA, NA, 1, 2, NA, NA, 1, 2, 1), 
       pch = c(16, 16, NA, NA, 16, 16, NA, NA, 16, 16, NA, NA, NA), 
       title = expression(bold("Legend")),
       lwd = c(NA, NA, 1, 1, NA, NA, 1, 1, NA, NA, 1, 1, 1),
       text.col = c("black", rep("black", 12)),
       cex = 0.8)

text(x = 9, y = 5, labels = "All R^2 values > 0.95", cex = 0.8, font = 2, col = "black") # R^2 informative text

#dev.off() #for saving

Alpha diversity species richness per plot

#### GET DATA ####
# calculations done in MATLAB plotalpha program

# create vectors of alpha diversity
established <- c(36, 29, 37, 48, 44)
marantaceae <- c(11, 2, 20, 3, 13)
riverine <- c(24, 29, 34, 36)



#### ANOVA ASSUMPTIONS ####
## test for normality
# Shapiro-Wilk test for normality dense
shapiro_test_resultDENSE <- shapiro.test(established)
# display the result
shapiro_test_resultDENSE
## 
##  Shapiro-Wilk normality test
## 
## data:  established
## W = 0.96876, p-value = 0.8673
# Shapiro-Wilk test for normality mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
shapiro_test_resultMAR
## 
##  Shapiro-Wilk normality test
## 
## data:  marantaceae
## W = 0.92774, p-value = 0.5811
# Shapiro-Wilk test for normality riv
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
shapiro_test_resultRIV
## 
##  Shapiro-Wilk normality test
## 
## data:  riverine
## W = 0.95015, p-value = 0.7171
## ALL p values > 0.05 so normality is assumed

## test variance between groups
# combine all groups into a single vector
all_data <- c(established, marantaceae, riverine)
# create a grouping variable
group <- rep(c("established", "marantaceae", "riverine"), 
             times = c(length(established), length(marantaceae), length(riverine)))
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(all_data, group)
# display the result
bartlett_test_result
## 
##  Bartlett test of homogeneity of variances
## 
## data:  all_data and group
## Bartlett's K-squared = 0.36474, df = 2, p-value = 0.8333
## ALL p values > 0.05 so homogeneity of variance differences is assumed



#### PLOT SPECIES RICHNESS ####
# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/alpha.pdf"
# Open a PDF device
#pdf(pdf_path)

# combine data into a list
data_list <- list(established, marantaceae, riverine)
# create boxplots
boxplot(data_list, names = c('dense', 'marantaceae', 'riverine'),
        col = c('plum3', 'darkseagreen2', 'chartreuse4'),
        ylab = 'Species richness', ylab.cex = 1, cex.lab = 1, cex.axis = 1,
        main = 'Species richness by plot')

# add a legend
legend('bottomleft', legend = c('dense', 'marantaceae', 'riverine'),
       col = c('plum3', 'darkseagreen2', 'chartreuse4'), pch = 15, cex = 1)

# Create a grouping variable
group <- rep(c("established", "marantaceae", "riverine"), 
             times = c(length(established), length(marantaceae), length(riverine)))



#### ANOVA ####
# perform ANOVA
anova_result <- aov(all_data ~ group)
# display the result
summary(anova_result)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        2 2221.4    1111   23.12 0.000115 ***
## Residuals   11  528.3      48                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# conduct Tukey's HSD post hoc test
tukey_result <- TukeyHSD(anova_result)
# display the result
print(tukey_result)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = all_data ~ group)
## 
## $group
##                           diff        lwr        upr     p adj
## marantaceae-established -29.00 -40.838483 -17.161517 0.0001028
## riverine-established     -8.05 -20.606608   4.506608 0.2373471
## riverine-marantaceae     20.95   8.393392  33.506608 0.0023539
# extract the letters
letters <- multcompLetters(tukey_result$group[, "p adj"])
# display the letters
#print(letters)
# marantaceae    riverine established 
#       "a"         "b"         "b" 
        
# add letters to box plots 
text(x = 1, y = 40, labels = "b", cex = .9, font = 2, col = "black")
text(x = 2, y = 8, labels = "a", cex = .9, font = 2, col = "black")
text(x = 3, y = 29, labels = "b", cex = .9, font = 2, col = "black")

#dev.off() #for saving

Alpha autocorrelation calculations

#### FORMAT DATA FOR FIRST INPUT OF MORANS I ####
# Code inspired by: https://stats.oarc.ucla.edu/r/faq/how-can-i-calculate-morans-i-in-r/
# spatial points data frame for each point
points <- lapply(list(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5), function(df) {
  coords <- df[, c("x", "y")] # x and y coordinates from the data
  spatial_points <- SpatialPoints(coords) 
  proj4string(spatial_points) <- CRS(proj4string(CHM)) # assign the same CRS as CHM
  return(spatial_points)
})


all_points <- rbind(F100, F110, F200, F300, F330, M1, M2, M3, M4, M5, R20, R30, R4, R5) # combine spatial points
points_sp <- SpatialPointsDataFrame(coords = all_points[, c("x", "y")], data = all_points) # x/y mapping
#plot(points_sp) # plot



#### RUN MORANS I ####
# For mar: P < 0.05 AUTOCORR*
MAR <- all_points[6:10,] # subset for mar
MAR.dists <- as.matrix(dist(cbind(MAR$x, MAR$y))) # distance matrix
MAR.dists.inv <- 1/MAR.dists # inverse distance matrix
diag(MAR.dists.inv) <- 0 # diagonals are all 0
print(MAR.dists.inv) # view
##             1           2           3           4           5
## 1 0.000000000 0.002774783 0.004088524 0.002473272 0.002730743
## 2 0.002774783 0.000000000 0.002411953 0.008780044 0.001756063
## 3 0.004088524 0.002411953 0.000000000 0.002578329 0.006381946
## 4 0.002473272 0.008780044 0.002578329 0.000000000 0.001876275
## 5 0.002730743 0.001756063 0.006381946 0.001876275 0.000000000
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] 0.1427277
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.1979179
## 
## $p.value
## [1] 0.04722291
# For riv: NO AUTOCORR
RIV <- all_points[11:14,] # subset for riv
RIV.dists <- as.matrix(dist(cbind(RIV$x, RIV$y))) # distance matrix
RIV.dists.inv <- 1/RIV.dists # inverse distance matrix
diag(RIV.dists.inv) <- 0 # diagonals are all 0
print(RIV.dists.inv) # view
##              1            2            3            4
## 1 0.0000000000 0.0002464323 0.0002485884 0.0031384506
## 2 0.0002464323 0.0000000000 0.0025512108 0.0002298021
## 3 0.0002485884 0.0025512108 0.0000000000 0.0002311272
## 4 0.0031384506 0.0002298021 0.0002311272 0.0000000000
Moran.I(riverine, RIV.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.8243662
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.5577275
## 
## $p.value
## [1] 0.3786333
# For dense: NO AUTOCORR
DEN <- all_points[1:5,] # subset for den
DEN.dists <- as.matrix(dist(cbind(DEN$x, DEN$y)))
DEN.dists.inv <- 1/DEN.dists # inverse distance matrix
diag(DEN.dists.inv) <- 0 # diagonals are all 0
print(DEN.dists.inv) # view
##              1            2            3            4            5
## 1 0.000000e+00 5.253938e-03 6.019823e-05 9.247962e-05 9.147051e-05
## 2 5.253938e-03 0.000000e+00 5.977000e-05 9.126227e-05 9.025593e-05
## 3 6.019823e-05 5.977000e-05 0.000000e+00 1.588458e-04 1.581111e-04
## 4 9.247962e-05 9.126227e-05 1.588458e-04 0.000000e+00 3.514931e-03
## 5 9.147051e-05 9.025593e-05 1.581111e-04 3.514931e-03 0.000000e+00
Moran.I(established, DEN.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] 0.5724514
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.4725076
## 
## $p.value
## [1] 0.08175199

Wood density

#### STANDARDIZE NAMING ####
# change column names for subset_FEPSL
colnames(PS) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# change column names for subset_FEPSM
colnames(PM) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# change column names for subset_FEPSS
colnames(PL) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
PE <- PW # rename
colnames(PE) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "H3", "H4", "H5","H6", "WD", "SUMS")

R <- list() # initialize
D <- list() # initialize
M <- list() # initialize

#### WOOD DENSITY BY HABITAT ####
subset_list <- list(PE, PL, PM, PS) # subset
SUBP <- list() # initialize new list

SaveNames <- c("/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WD.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WDS.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WDM.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/WDL.pdf") # for figure saving

for (i in seq_along(subset_list)) { # loop through subsets of sizes
  P <- subset_list[[i]]
indices <- which(P$hab %in% c("established", "marantaceae", "riverine"))
# subset PS using the true indices
subset_P <- P[indices, ]

# mar subset for auto cor test
subset_by_habitatM <- subset(subset_P, hab == "marantaceae")
subset_by_habitatM1 <- subset(subset_by_habitatM, plot == "M1")
avg_densityM1 <- mean(subset_by_habitatM1$WD)
subset_by_habitatM2 <- subset(subset_by_habitatM, plot == "M2")
avg_densityM2 <- mean(subset_by_habitatM2$WD)
subset_by_habitatM3 <- subset(subset_by_habitatM, plot == "M3")
avg_densityM3 <- mean(subset_by_habitatM3$WD)
subset_by_habitatM4 <- subset(subset_by_habitatM, plot == "M4")
avg_densityM4 <- mean(subset_by_habitatM4$WD)
subset_by_habitatM5 <- subset(subset_by_habitatM, plot == "M5")
avg_densityM5 <- mean(subset_by_habitatM5$WD)

marantaceae <- c(avg_densityM1,avg_densityM2,avg_densityM3,avg_densityM4,avg_densityM5)

M[[i]] <- marantaceae

# den subset for auto cor test
subset_by_habitatD <- subset(subset_P, hab == "established")
subset_by_habitatDF100 <- subset(subset_by_habitatD, plot == "F100")
avg_densityDF100 <- mean(subset_by_habitatDF100$WD)
subset_by_habitatDF110 <- subset(subset_by_habitatD, plot == "F110")
avg_densityDF110 <- mean(subset_by_habitatDF110$WD)
subset_by_habitatDF200 <- subset(subset_by_habitatD, plot == "F200")
avg_densityDF200 <- mean(subset_by_habitatDF200$WD)
subset_by_habitatDF300 <- subset(subset_by_habitatD, plot == "F300")
avg_densityDF300 <- mean(subset_by_habitatDF300$WD)
subset_by_habitatDF330 <- subset(subset_by_habitatD, plot == "F330")
avg_densityDF330 <- mean(subset_by_habitatDF330$WD)

established <- c(avg_densityDF100,avg_densityDF110,avg_densityDF200,avg_densityDF300,avg_densityDF330)

D[[i]] <- established

# riv subset for auto cor test
subset_by_habitatR <- subset(subset_P, hab == "riverine")
subset_by_habitatR20 <- subset(subset_by_habitatR, plot == "R20")
avg_densityR20 <- mean(subset_by_habitatR20$WD)
subset_by_habitatR30 <- subset(subset_by_habitatR, plot == "R30")
avg_densityR30 <- mean(subset_by_habitatR30$WD)
subset_by_habitatR4 <- subset(subset_by_habitatR, plot == "R4")
avg_densityR4 <- mean(subset_by_habitatR4$WD)
subset_by_habitatR5 <- subset(subset_by_habitatR, plot == "R5")
avg_densityR5 <- mean(subset_by_habitatR5$WD)

riverine <- c(avg_densityR20,avg_densityR30,avg_densityR4,avg_densityR5) # combine

R[[i]] <- riverine

#pdf_path <- SaveNames[i]
# Open a PDF device
#pdf(pdf_path)

# create a boxplot
P_viz <- ggplot(subset_P, aes(x = hab, y = WD, fill = hab)) +
  geom_boxplot() +
  labs(x = 'Habitat type', y = 'Wood density', title = 'Tree wood density variations by habitat') +
  labs(fill = 'Habitat Type') +
  scale_fill_manual(values = c('plum3', 'darkseagreen2', 'chartreuse4')) +
  theme(text = element_text(size = 20)) +
  coord_cartesian(ylim = c(0, 1))

plot(P_viz) # plot

#dev.off() #for saving


#### ANOVA ASSUMPTIONS ####
## test for normality
# Shapiro-Wilk test for normality
subset_riverine <- subset(subset_P, hab == "riverine")
shapiro_test_resultRIV <- shapiro.test(subset_riverine$WD)
# display the result
print(shapiro_test_resultRIV)
subset_den <- subset(subset_P, hab == "established")
shapiro_test_resultDEN <- shapiro.test(subset_den$WD)
# display the result
print(shapiro_test_resultDEN)
subset_mar <- subset(subset_P, hab == "marantaceae")
shapiro_test_resultMAR <- shapiro.test(subset_mar$WD)
# display the result
print(shapiro_test_resultMAR)
## AT LEAST 1 p value < 0.05 so normality is NOT assumed
# and, transformation does not make data normal

## test variance between groups
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(subset_P$WD, subset_P$hab)
# display the result
print(bartlett_test_result)
##  AT LEAST 1 p value < 0.05 so homogeneity of variance differences is NOT assumed



#### KURSKAL-WALLIS TEST ####
# Kruskal-Wallis Test
result <- kruskal.test(WD ~ hab, data = subset_P)
print(result) # view

# Dunn test for posthoc analysis
posthoc_result <- dunn.test(subset_P$WD, subset_P$hab, method = "bonferroni")
# there are significant differences in the variable x among the groups established, mar, and riverine
SUBP[[i]] <- subset_P
}

## 
##  Shapiro-Wilk normality test
## 
## data:  subset_riverine$WD
## W = 0.93137, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_den$WD
## W = 0.94687, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_mar$WD
## W = 0.91345, p-value = 1.034e-11
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  subset_P$WD and subset_P$hab
## Bartlett's K-squared = 56.133, df = 2, p-value = 6.468e-13
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by hab
## Kruskal-Wallis chi-squared = 377.75, df = 2, p-value < 2.2e-16
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 377.7518, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |   16.28680
##          |    0.0000*
##          |
## riverine |   13.05307  -9.749851
##          |    0.0000*    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

## 
##  Shapiro-Wilk normality test
## 
## data:  subset_riverine$WD
## W = 0.93693, p-value = 0.009263
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_den$WD
## W = 0.93268, p-value = 0.0003053
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_mar$WD
## W = 0.92634, p-value = 0.1483
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  subset_P$WD and subset_P$hab
## Bartlett's K-squared = 24.363, df = 2, p-value = 5.123e-06
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by hab
## Kruskal-Wallis chi-squared = 6.9189, df = 2, p-value = 0.03145
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 6.9189, df = 2, p-value = 0.03
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |   2.626357
##          |    0.0129*
##          |
## riverine |   0.839537  -1.929388
##          |     0.6018     0.0805
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

## 
##  Shapiro-Wilk normality test
## 
## data:  subset_riverine$WD
## W = 0.91741, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_den$WD
## W = 0.93977, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_mar$WD
## W = 0.77253, p-value = 6.168e-06
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  subset_P$WD and subset_P$hab
## Bartlett's K-squared = 21.491, df = 2, p-value = 2.155e-05
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by hab
## Kruskal-Wallis chi-squared = 135.93, df = 2, p-value < 2.2e-16
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 135.9259, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |   10.47243
##          |    0.0000*
##          |
## riverine |   6.564840  -8.520621
##          |    0.0000*    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2

## 
##  Shapiro-Wilk normality test
## 
## data:  subset_riverine$WD
## W = 0.92614, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_den$WD
## W = 0.92969, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_mar$WD
## W = 0.89989, p-value = 3.134e-11
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  subset_P$WD and subset_P$hab
## Bartlett's K-squared = 67.294, df = 2, p-value = 2.44e-15
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by hab
## Kruskal-Wallis chi-squared = 323.64, df = 2, p-value < 2.2e-16
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 323.6446, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |   13.53082
##          |    0.0000*
##          |
## riverine |   13.71990  -5.638251
##          |    0.0000*    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
#### WOOD DENSITY BY TREE HEIGHT ####
habtypes <- list("established", "marantaceae", "riverine") # list types of habitat

SaveNames <- c("/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/estWD.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/marWD.pdf","/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/rivWD.pdf")

y_limits <- c(0, .9)

for (i in seq_along(habtypes)) { # loop through habitat types
  # subset for S, M, and L
  subset_riverineS <- subset(SUBP[[4]] , hab == habtypes[[i]]) 
  subset_riverineM <- subset(SUBP[[3]] , hab == habtypes[[i]])
  subset_riverineL <- subset(SUBP[[2]] , hab == habtypes[[i]])
  # rename
  names(subset_riverineS)[7] <- "WD"
  names(subset_riverineM)[7] <- "WD"
  names(subset_riverineL)[7] <- "WD"
  # combine the subsettted data
  combined_data <- bind_rows(
  mutate(subset_riverineS, Size = "Small"),
  mutate(subset_riverineM, Size = "Medium"),
  mutate(subset_riverineL, Size = "Large")
)
  

#### CHECK ANOVA ASSUMPTIONS #### 
# Shapiro-Wilk test for normality
subset_SM <- subset(combined_data, Size == "Small")
shapiro_test_resultSM <- shapiro.test(subset_SM$WD)
print(shapiro_test_resultSM)
# Shapiro-Wilk test for normality
subset_MM <- subset(combined_data, Size == "Medium")
shapiro_test_resultMM <- shapiro.test(subset_MM$WD)
print(shapiro_test_resultMM)
# Shapiro-Wilk test for normality
subset_LM <- subset(combined_data, Size == "Large")
shapiro_test_resultLM <- shapiro.test(subset_LM$WD)
print(shapiro_test_resultLM)
# all p values < 0.05

## test variance between groups
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(WD ~ Size, data = combined_data)
# display the result
print(bartlett_test_result)
## p value < 0.05 so homogeneity of variance differences is NOT assumed... so KW test


  #### KURSKAL-WALLIS TEST ####
  result <- kruskal.test(WD ~ Size, data = combined_data)
  print(result) # view
  # Dunn test for posthoc
  posthoc_result <- dunn.test(combined_data$WD, combined_data$Size, method = "bonferroni")
  # small group differs significantly from medium and large. No significant difference between medium and large groups.
  print(posthoc_result)

  #pdf_path <- SaveNames[i]
# Open a PDF device
  #pdf(pdf_path)
  
  #### PLOT ####
  boxplot <- ggplot(combined_data, aes(x = Size, y = WD, fill = Size)) + # boxplot
    geom_boxplot() +
    labs(x = 'Size', y = 'Wood density', title = paste('Wood Density for', habtypes[[i]])) +
    scale_fill_manual(values = c('Small' = 'white', 'Medium' = 'orange2', 'Large' = 'tomato3')) +
    theme(text = element_text(size = 14))+
    ylim(y_limits)  # Set y-axis limits
  print(boxplot)
  #dev.off() #for saving
}
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_SM$WD
## W = 0.92969, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_MM$WD
## W = 0.93977, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_LM$WD
## W = 0.93268, p-value = 0.0003053
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  WD by Size
## Bartlett's K-squared = 5.6458, df = 2, p-value = 0.05943
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by Size
## Kruskal-Wallis chi-squared = 4.498, df = 2, p-value = 0.1055
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 4.498, df = 2, p-value = 0.11
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |      Large     Medium
## ---------+----------------------
##   Medium |  -2.089527
##          |     0.0550
##          |
##    Small |  -1.830495   0.911858
##          |     0.1008     0.5428
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 4.498037
## 
## $Z
## [1] -2.0895271 -1.8304954  0.9118588
## 
## $P
## [1] 0.01833015 0.03358795 0.18092153
## 
## $P.adjusted
## [1] 0.05499045 0.10076384 0.54276459
## 
## $comparisons
## [1] "Large - Medium" "Large - Small"  "Medium - Small"
## Warning: Removed 87 rows containing non-finite values (`stat_boxplot()`).

## 
##  Shapiro-Wilk normality test
## 
## data:  subset_SM$WD
## W = 0.89989, p-value = 3.134e-11
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_MM$WD
## W = 0.77253, p-value = 6.168e-06
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_LM$WD
## W = 0.92634, p-value = 0.1483
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  WD by Size
## Bartlett's K-squared = 14.687, df = 2, p-value = 0.0006467
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by Size
## Kruskal-Wallis chi-squared = 31.926, df = 2, p-value = 1.168e-07
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 31.9257, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |      Large     Medium
## ---------+----------------------
##   Medium |   3.279640
##          |    0.0016*
##          |
##    Small |  -0.378235  -5.647023
##          |     1.0000    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 31.92568
## 
## $Z
## [1]  3.2796409 -0.3782358 -5.6470235
## 
## $P
## [1] 5.196964e-04 3.526277e-01 8.162475e-09
## 
## $P.adjusted
## [1] 1.559089e-03 1.000000e+00 2.448743e-08
## 
## $comparisons
## [1] "Large - Medium" "Large - Small"  "Medium - Small"
## Warning: Removed 3 rows containing non-finite values (`stat_boxplot()`).

## 
##  Shapiro-Wilk normality test
## 
## data:  subset_SM$WD
## W = 0.92614, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_MM$WD
## W = 0.91741, p-value < 2.2e-16
## 
## 
##  Shapiro-Wilk normality test
## 
## data:  subset_LM$WD
## W = 0.93693, p-value = 0.009263
## 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  WD by Size
## Bartlett's K-squared = 69.981, df = 2, p-value = 6.365e-16
## 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  WD by Size
## Kruskal-Wallis chi-squared = 80.499, df = 2, p-value < 2.2e-16
## 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 80.4987, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |      Large     Medium
## ---------+----------------------
##   Medium |  -1.790757
##          |     0.1100
##          |
##    Small |   1.246783   8.971701
##          |     0.3187    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
## $chi2
## [1] 80.49868
## 
## $Z
## [1] -1.790757  1.246784  8.971702
## 
## $P
## [1] 3.666614e-02 1.062384e-01 1.459839e-19
## 
## $P.adjusted
## [1] 1.099984e-01 3.187152e-01 4.379518e-19
## 
## $comparisons
## [1] "Large - Medium" "Large - Small"  "Medium - Small"
## Warning: Removed 25 rows containing non-finite values (`stat_boxplot()`).

#### AUTOCORRELATION ####
# no autocor found in any:
# small
Moran.I(R[[1]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.8225437
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.4753479
## 
## $p.value
## [1] 0.3034031
Moran.I(M[[1]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] 0.006988331
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.2194327
## 
## $p.value
## [1] 0.2415391
Moran.I(D[[1]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.03501003
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.4437589
## 
## $p.value
## [1] 0.628049
# medium
Moran.I(R[[2]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.5243141
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.5006785
## 
## $p.value
## [1] 0.7028738
Moran.I(M[[2]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] 0.02121841
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.1574666
## 
## $p.value
## [1] 0.08499954
Moran.I(D[[2]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.55921
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.5461904
## 
## $p.value
## [1] 0.5713114
# large
Moran.I(R[[3]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] 0.3135492
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.4842379
## 
## $p.value
## [1] 0.1815893
Moran.I(M[[3]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3999048
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.2138138
## 
## $p.value
## [1] 0.4832409
Moran.I(D[[3]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.07588092
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.4310849
## 
## $p.value
## [1] 0.6862796
# all
Moran.I(R[[4]], RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.625936
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.3669714
## 
## $p.value
## [1] 0.4252509
Moran.I(M[[4]], MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.08207075
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.213484
## 
## $p.value
## [1] 0.4315085
Moran.I(D[[4]], DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.1395928
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.3458645
## 
## $p.value
## [1] 0.749559

Wood density variance/accuracy calculations

#### BY HEIGHT CLASS TREES VARIANCE ####
### ESTABLISHED
## small established
PS$var <- NA # initialize variance column
for (i in 1:nrow(PS)) { # loop through small trees rows
  hab_class <- PS[i,2] 
  if (hab_class == "established") {
    RowCode <- PS[i,4] # get species code
    SPoi <- RowCode$SP # get sp subset
    row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
    VVal <- SPDWDV$v[row_placement] # get the v value at the row number
    PS$var[i] <- VVal # add it in the PS dataframe
  }
}
MSv <-mean(PS$var[PS[,2] == "established"], na.rm = TRUE) # find mean variance
print(MSv)
## [1] 0.7301031
PP <- (PS$var[PS[,2] == "established"])
SSv <- sum(PP, na.rm = TRUE) # sum of variances for later calculation


## medium established
PM$var <- NA # initialize variance column
for (i in 1:nrow(PM)) { # loop through medium trees rows
    hab_class <- PM[i,2]
      if (hab_class == "established") {
RowCode <- PM[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PM$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MMv <-mean(PM$var[PM[,2] == "established"], na.rm = TRUE) # find mean variance
print(MMv)
## [1] 0.7292308
PP <- (PM$var[PM[,2] == "established"]) # select for type
SMv <- sum(PP, na.rm = TRUE) # sum of variances for later calculation


## large established
PL$var <- NA # initialize variance column
for (i in 1:nrow(PL)) {  # loop through large trees rows
  hab_class <- PL[i,2]
  if (hab_class == "established") {
RowCode <- PL[i,4] # get the code from the row
SPoi <- RowCode$SP  # get sp subset 
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV 
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PL$var[i] <- VVal  # add it to a v in the PS dataframe 
}
}
MLv <- mean(PL$var[PL[,2] == "established"], na.rm = TRUE) # find mean variance
print(MLv)
## [1] 0.7633169
PP <- (PL$var[PL[,2] == "established"]) # select for type
SLv <- sum(PP, na.rm = TRUE) # sum of variances for later calculation

## for all established height classes
all <- c(PM$var[PM[,2] == "established"], PL$var[PL[,2] == "established"], PS$var[PS[,2] == "established"]) # combine
Estall <- mean(all, na.rm = TRUE) # mean
print(Estall) # view
## [1] 0.730695
### RIVERINE
## small riverine
for (i in 1:nrow(PS)) { # loop through small trees rows
  hab_class <- PS[i,2] 
  if (hab_class == "riverine") {
    RowCode <- PS[i,4] # get species code
    SPoi <- RowCode$SP # get sp subset
    row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
    VVal <- SPDWDV$v[row_placement] # get the v value at the row number
    PS$var[i] <- VVal # add it in the PS dataframe
  }
}
MSv <-mean(PS$var[PS[,2] == "riverine"], na.rm = TRUE) # find mean variance
print(MSv) # veiw
## [1] 0.6780073
PP <- (PS$var[PS[,2] == "riverine"]) # subset by type
SSr <- sum(PP, na.rm = TRUE) # sum of variances for later calculation


## medium riverine
for (i in 1:nrow(PM)) { # loop through medium trees rows
    hab_class <- PM[i,2]
      if (hab_class == "riverine") {
RowCode <- PM[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PM$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MMv <-mean(PM$var[PM[,2] == "riverine"], na.rm = TRUE) # find mean variance
print(MMv)
## [1] 0.6596248
PP <- (PM$var[PM[,2] == "riverine"]) # subset by type
SMr <- sum(PP, na.rm = TRUE) # sum of variances for later calculation

## large riverine
for (i in 1:nrow(PL)) {  # loop through large trees rows
  hab_class <- PL[i,2]
  if (hab_class == "riverine") {
RowCode <- PL[i,4] # get the code from the row
SPoi <- RowCode$SP  # get sp subset 
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV 
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PL$var[i] <- VVal  # add it to a v in the PS dataframe 
}
}
MLv <- mean(PL$var[PL[,2] == "riverine"], na.rm = TRUE) # find mean variance
print(MLv)
## [1] 0.6807627
PP <- (PL$var[PL[,2] == "riverine"]) # subset by type
SLr <- sum(PP, na.rm = TRUE) # sum of variances for later calculation

## for all riverine height classes
all <- c(PM$var[PM[,2] == "riverine"], PL$var[PL[,2] == "riverine"], PS$var[PS[,2] == "riverine"]) # combine
Estall <- mean(all, na.rm = TRUE) # mean
print(Estall) # view
## [1] 0.6703066
### MAR
## small marantaceae
for (i in 1:nrow(PS)) { # loop through small trees rows
  hab_class <- PS[i,2] 
  if (hab_class == "marantaceae") {
    RowCode <- PS[i,4] # get species code
    SPoi <- RowCode$SP # get sp subset
    row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
    VVal <- SPDWDV$v[row_placement] # get the v value at the row number
    PS$var[i] <- VVal # add it in the PS dataframe
  }
}
MSv <-mean(PS$var[PS[,2] == "marantaceae"], na.rm = TRUE) # find mean variance
print(MSv) # view
## [1] 0.8254952
PP <- (PS$var[PS[,2] == "marantaceae"]) # subset by type
SSm <- sum(PP, na.rm = TRUE) # sum of variances for later calculation


## medium marantaceae
#PM$var <- NA # initialize variance column
for (i in 1:nrow(PM)) { # loop through medium trees rows
    hab_class <- PM[i,2]
      if (hab_class == "marantaceae") {
RowCode <- PM[i,4] # get the code from the row
SPoi <- RowCode$SP # get sp subset
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PM$var[i] <- VVal # add it to a v in the PS dataframe
}
}
MMv <-mean(PM$var[PM[,2] == "marantaceae"], na.rm = TRUE) # find mean variance
print(MMv)
## [1] 0.7443657
PP <- (PM$var[PM[,2] == "marantaceae"]) # subset by type
SMm <- sum(PP, na.rm = TRUE) # sum of variances for later calculation

## large marantaceae
#PL$var <- NA # initialize variance column
for (i in 1:nrow(PL)) {  # loop through large trees rows
  hab_class <- PL[i,2]
  if (hab_class == "marantaceae") {
RowCode <- PL[i,4] # get the code from the row
SPoi <- RowCode$SP  # get sp subset 
row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV 
VVal <- SPDWDV$v[row_placement] # get the v value at the row number
PL$var[i] <- VVal  # add it to a v in the PS dataframe 
}
}
MLv <- mean(PL$var[PL[,2] == "marantaceae"], na.rm = TRUE) # find mean variance
print(MLv)
## [1] 0.8331158
PP <- (PL$var[PL[,2] == "marantaceae"]) # subset by type
SLm <- sum(PP, na.rm = TRUE) # sum of variances for later calculation

## for all marantaceae
all <- c(PM$var[PM[,2] == "marantaceae"], PL$var[PL[,2] == "marantaceae"], PS$var[PS[,2] == "marantaceae"]) # combine
Estall <- mean(all, na.rm = TRUE) # mean
print(Estall) # view
## [1] 0.8159731
#### ALL HEIGHT CLASS TREES VARIANCE ####
## all small
small <- c(PS$var[PS[,2] == "marantaceae"], PS$var[PS[,2] == "established"], PS$var[PS[,2] == "riverine"]) # combine
Msmall <- mean(small, na.rm = TRUE) # mean
print(Msmall) # view
## [1] 0.7226849
## all medium
mid <- c(PM$var[PM[,2] == "marantaceae"], PM$var[PM[,2] == "established"], PM$var[PM[,2] == "riverine"]) # combine
Mmid <- mean(mid, na.rm = TRUE) # mean
print(Mmid) # view
## [1] 0.696713
# all large
lar <- c(PL$var[PL[,2] == "marantaceae"], PL$var[PL[,2] == "established"], PL$var[PL[,2] == "riverine"]) # combine
Mlar <- mean(lar, na.rm = TRUE) # mean
print(Mlar) # view
## [1] 0.7444667
## all all
all <- c(PM$var[PM[,2] == "established"], PL$var[PL[,2] == "established"], PS$var[PS[,2] == "established"], PM$var[PM[,2] == "marantaceae"], PL$var[PL[,2] == "marantaceae"], PS$var[PS[,2] == "marantaceae"], PM$var[PM[,2] == "riverine"], PL$var[PL[,2] == "riverine"], PS$var[PS[,2] == "riverine"]) # combine
ALLall <- mean(all, na.rm = TRUE) # mean
print(ALLall) # view
## [1] 0.7158947

Beta diversity: NMDS

#### GET DATA ####
selected_habitats <- c("marantaceae", "riverine", "established")
# create a subset based on the selected habitats for all trees
subset_FEPS <- subset(FEPS, Class %in% selected_habitats)
PL[, c(5, 6)] <- lapply(PL[, c(5, 6)], function(x) ifelse(x >= 1, 1, x))
# create a subset based on the selected habitats for large trees
subset_FEPSL <- subset(PL, hab %in% selected_habitats)
PM[, c(5, 6)] <- lapply(PM[, c(5, 6)], function(x) ifelse(x >= 1, 1, x))
# create a subset based on the selected habitats for medium trees
subset_FEPSM <- subset(PM, hab %in% selected_habitats)
# Create a subset based on the selected habitats for small trees
PS[, c(5, 6)] <- lapply(PS[, c(5, 6)], function(x) ifelse(x >= 1, 1, x))
subset_FEPSS <- subset(PS, hab %in% selected_habitats)

pdf_paths <- c(
  "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds1.pdf",
  "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds2.pdf",
  "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds3.pdf",
  "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/nmds4.pdf"
) # paths to save in loop

#### CONVERT HEIGHT CLASS DATA ####
# sum across rows for columns 5 - 10
subset_FEPS$...11 <- rowSums(subset_FEPS[, 5:10], na.rm = TRUE)
# sum tally by unique species in plot
sum_by_plot_species <- aggregate(subset_FEPS[, 5:10], by = list(plotType = subset_FEPS$Plot, species = subset_FEPS$Species, habitat_type = subset_FEPS$Class), sum, na.rm = TRUE)

sum_by_plot_species$Total_C <- rowSums(sum_by_plot_species[, 4:9])
# unique species names as columns. one plot per row. habitat denotation then reshape the data/pivot the data
pivoted_data <- sum_by_plot_species %>%
  pivot_wider(
    id_cols = c("plotType", "habitat_type"),
    names_from = "species",
    values_from = "Total_C",
    values_fill = 0
  )

pivoted_data$plotType <- NULL

Singles_pivoted_data <- pivoted_data

#### REMOVE SINGLETONS ####
converted_data <- pivoted_data # to not overwrite pivoted data
# convert all numbers greater than 0 to 1 in columns 2 to the end
converted_data <- mutate_all(converted_data[, 2:ncol(converted_data)], ~ifelse(. > 0, 1, .))
# sum the columns
column_sums <- colSums(converted_data)
# identify columns to keep (where sum is greater than 1)
columns_to_keep <- names(column_sums[column_sums > 1])
pivoted_dataA <- pivoted_data[, c("habitat_type", columns_to_keep)]



#### STANDARDIZE NAMING ####
# just in case code in above sections was skipped, re-standardize... not needed
# Change column names for subset_FEPSL
#colnames(subset_FEPSL) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# Change column names for subset_FEPSM
#colnames(subset_FEPSM) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")
# Change column names for subset_FEPSS
#colnames(subset_FEPSS) <- c("plot", "hab", "subplot", "SP", "HC1", "HC2", "WD", "SUMS")


#### CONVERT S M L DATA ####
# create a list of  subsets
subset_list <- list(subset_FEPSL, subset_FEPSM, subset_FEPSS)
# initialize empty list to store the results
pivoted_data_list <- list()

# loop through each subset
for (i in seq_along(subset_list)) {
  # perform the operations on each subset
  subset <- subset_list[[i]]
  subset$SUMS <- rowSums(subset[, 5:6], na.rm = TRUE)
  sum_by_plot_species <- aggregate(subset[, 5:6], by = list(plotType = subset$plot, species = subset$SP, habitat_type = subset$hab), sum, na.rm = TRUE)
  sum_by_plot_species$Total_C <- rowSums(sum_by_plot_species[, 4:5])
  pivoted_data <- sum_by_plot_species %>%
    pivot_wider(
      id_cols = c("plotType", "habitat_type"),
      names_from = "species",
      values_from = "Total_C",
      values_fill = 0
    )
  converted_data <- pivoted_data
  # convert all numbers greater than 0 to 1 in columns 2 to the end
  converted_data <- mutate_all(converted_data[, 2:ncol(converted_data)], ~ifelse(. > 0, 1, .))
  # sum the columns
  column_sums <- colSums(converted_data)
  # identify columns to keep (where sum is greater than 1)
  columns_to_keep <- names(column_sums[column_sums > 1])
  pivoted_data <- pivoted_data[, c(columns_to_keep)]
  # store the pivoted data in the list
  pivoted_data_list[[i]] <- pivoted_data
}

# assign each pivoted dataset to separate variables
pivoted_dataL <- pivoted_data_list[[1]]
pivoted_dataM <- pivoted_data_list[[2]]
pivoted_dataS <- pivoted_data_list[[3]]



#### DISTANCE MATRIX ####
# list of data frames
pivoted_data_list <- list(pivoted_dataA, pivoted_dataL, pivoted_dataM, pivoted_dataS)
# list to store distance matrices
DMAt_list <- list()

# loop through the list
for (i in seq_along(pivoted_data_list)) {
  # consolidate data
  consolidated_data <- pivoted_data_list[[i]] %>%
    group_by(habitat_type) %>%
    summarise_all(sum)
  # create distance matrix
  DMAt <- vegdist(consolidated_data[, -1], method = "bray")
  # store distance matrix in the list
  DMAt_list[[i]] <- as.matrix(DMAt)
}

# access distance matrices from the list
DMAt <- DMAt_list[[1]]  # for pivoted_data
DMAtL <- DMAt_list[[2]]  # for pivoted_dataL
DMAtM <- DMAt_list[[3]]  # for pivoted_dataM
DMAtS <- DMAt_list[[4]]  # for pivoted_dataS

#### LOOP FOR STRESS TEST, perMANOVA, AND PLOT ####
# run on each DMAt and pivoted_data. results printed each loop 
NMDS_list <- list()
MANOVARES_list <- list()
for (i in 1:4) {
  pivoted_data <- pivoted_data_list[[i]]
  DMAt <- DMAt_list[[i]]

#### STRESS TEST ####
  set.seed(1) # set seed for replicability
  goeveg::dimcheckMDS(pivoted_data[ ,-1], k = 3) # check for 3, value goes below red dotted line. Convergence!
  goeveg::dimcheckMDS(pivoted_data[ ,-1], k = 2) # check for 2, value goes below red dotted line.  Convergence!
  goeveg::dimcheckMDS(pivoted_data[ ,-1], k = 1) # check for 1, value goes below red dotted line.  Convergence!
  # 1 doesn't work, so k = 2 for all of the pivoted data types

  MMDS <- metaMDS(pivoted_data[ , -1], k = 2) # run MDS
  MMDS # view... stress = 0.08
  stressplot(MMDS) # view how the stress plot looks, R^2 is > 0.9, which means that the metaMDS analysis worked well  modeling data.



#### perMANOVA ####
  perMANOVA <- adonis2(as.matrix(pivoted_data[ ,-1]) ~ as.matrix(pivoted_data[ ,1])) # run perMANOVA using as.matrix to make data work with adonis2
  perMANOVA # view test
  summary(perMANOVA) # view summary specifics
  # in NMDS space, there is a statistically significant separation by disturbance treatments, as the P value is < 0.05.
  MANOVARES_list[[i]] <- perMANOVA
  NMDS_list[[i]] <- MMDS



#### PLOT NMDS ####
# Plotting the NMDS solution
plot(MMDS, type = "n")  # Create an empty plot
text(MMDS, display = "species", cex = 0.7)  # Add species labels to the plot

# MMDS is NMDS output, splitting by habitat type
habitat_data <- data.frame(x = MMDS$points[, 1], y = MMDS$points[, 2], habitat_type = pivoted_data$habitat_type)
# matrix of NMDS coordinates
habitat_matrix <- subset(habitat_data, select = c("x", "y"))
# calculate euclidean distances
euclidean_distances <- dist(habitat_matrix)
# convert the distance object to a matrix
euclidean_matrix <- as.matrix(euclidean_distances)
# set row and column names
rownames(euclidean_matrix) <- habitat_data$habitat_type
colnames(euclidean_matrix) <- habitat_data$habitat_type
# plot heatmap
heatmap(euclidean_matrix, 
        Rowv = NA, Colv = NA,  # Turn off clustering
        col = colorRampPalette(c("white", "rosybrown1", "indianred", "brown", "firebrick4"))(20),  # color palette
        main = "Habitat Heatmap - Euclidean Distance")

plot(MMDS, type = "n")  # create an empty plot
text(MMDS, display = "species", cex = 0.7)  # add species labels to the plot

# plot species
ordiplot(MMDS, type = "n")  # create an empty plot
# add species labels to the plot
orditorp(MMDS, display = "species", col = "red", air = 0.01)
# add site labels to the plot with increased size
orditorp(MMDS, display = "sites", cex = 1.25, air = 0.01)

#pdf_path <- pdf_paths[i]
# Open a PDF device
#pdf(pdf_path)

# plot species and boundaries
 ordiplot(MMDS, type = "n")  # create an empty plot
 # add species labels to the plot in red
 orditorp(MMDS, display = "species", col = "black", air = 0.01)
 # add site labels to the plot with increased size and color code by treatment
orditorp(MMDS, display = "sites", cex = 2, air = 0.03, 
         col = ifelse(habitat_data$habitat_type == "riverine", "chartreuse4",
                      ifelse(habitat_data$habitat_type == "marantaceae", "darkseagreen3", "plum4")))

ordiellipse(MMDS, groups = habitat_data$habitat_type, kind = "se", conf = 0.95, col = c('plum3', 'darkseagreen2', 'chartreuse4')) # 95% elipse

legend("topright", legend = c("dense", "marantacae", "riverine"),
       fill = c('plum3', 'darkseagreen2', 'chartreuse4'), title = "Habitat Type", cex = 0.8)

title("NMDS of tree species composition by habitat")

round(MMDS$stress, 4) # view stress

#dev.off() #for saving
}
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1943328 
## Run 1 stress 0.3524536 
## Run 2 stress 0.3929939 
## Run 3 stress 0.3840533 
## Run 4 stress 0.1956176 
## Run 5 stress 0.3772697 
## Run 6 stress 0.1927954 
## ... New best solution
## ... Procrustes: rmse 0.01524285  max resid 0.04367903 
## Run 7 stress 0.3252388 
## Run 8 stress 0.1943328 
## Run 9 stress 0.1943328 
## Run 10 stress 0.3113665 
## Run 11 stress 0.3930091 
## Run 12 stress 0.2464218 
## Run 13 stress 0.3776397 
## Run 14 stress 0.4592865 
## Run 15 stress 0.3650915 
## Run 16 stress 0.277236 
## Run 17 stress 0.2678002 
## Run 18 stress 0.2047404 
## Run 19 stress 0.350759 
## Run 20 stress 0.4564972 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1124106 
## Run 1 stress 0.1112681 
## ... New best solution
## ... Procrustes: rmse 0.1158367  max resid 0.2738696 
## Run 2 stress 0.07798028 
## ... New best solution
## ... Procrustes: rmse 0.09716641  max resid 0.1875646 
## Run 3 stress 0.1124105 
## Run 4 stress 0.07798012 
## ... New best solution
## ... Procrustes: rmse 0.0005749526  max resid 0.001726034 
## ... Similar to previous best
## Run 5 stress 0.07798006 
## ... New best solution
## ... Procrustes: rmse 0.000263677  max resid 0.0007657745 
## ... Similar to previous best
## Run 6 stress 0.07798033 
## ... Procrustes: rmse 0.0004538146  max resid 0.001362611 
## ... Similar to previous best
## Run 7 stress 0.1143732 
## Run 8 stress 0.1112681 
## Run 9 stress 0.1096314 
## Run 10 stress 0.07798025 
## ... Procrustes: rmse 0.0003067676  max resid 0.0008992422 
## ... Similar to previous best
## Run 11 stress 0.1088575 
## Run 12 stress 0.07798006 
## ... Procrustes: rmse 0.0001170406  max resid 0.0003145423 
## ... Similar to previous best
## Run 13 stress 0.07798015 
## ... Procrustes: rmse 0.0002206639  max resid 0.000577346 
## ... Similar to previous best
## Run 14 stress 0.07798031 
## ... Procrustes: rmse 0.0003581765  max resid 0.001011074 
## ... Similar to previous best
## Run 15 stress 0.07798009 
## ... Procrustes: rmse 0.0001404872  max resid 0.0003738382 
## ... Similar to previous best
## Run 16 stress 0.1763144 
## Run 17 stress 0.1096313 
## Run 18 stress 0.1143729 
## Run 19 stress 0.1769718 
## Run 20 stress 0.07798005 
## ... New best solution
## ... Procrustes: rmse 0.0001325533  max resid 0.0002572146 
## ... Similar to previous best
## *** Best solution repeated 1 times
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.04100057 
## Run 1 stress 0.04099468 
## ... New best solution
## ... Procrustes: rmse 0.001852491  max resid 0.003446237 
## ... Similar to previous best
## Run 2 stress 0.04099474 
## ... Procrustes: rmse 6.576772e-05  max resid 0.0001243949 
## ... Similar to previous best
## Run 3 stress 0.0409948 
## ... Procrustes: rmse 0.001095018  max resid 0.002111007 
## ... Similar to previous best
## Run 4 stress 0.04099464 
## ... New best solution
## ... Procrustes: rmse 2.487539e-05  max resid 4.875293e-05 
## ... Similar to previous best
## Run 5 stress 0.05971032 
## Run 6 stress 0.04099477 
## ... Procrustes: rmse 0.001057967  max resid 0.002039177 
## ... Similar to previous best
## Run 7 stress 0.04099478 
## ... Procrustes: rmse 0.0009725434  max resid 0.001874584 
## ... Similar to previous best
## Run 8 stress 0.04099497 
## ... Procrustes: rmse 0.001182615  max resid 0.00227601 
## ... Similar to previous best
## Run 9 stress 0.06210231 
## Run 10 stress 0.04099456 
## ... New best solution
## ... Procrustes: rmse 0.0008879197  max resid 0.001704026 
## ... Similar to previous best
## Run 11 stress 0.04688513 
## Run 12 stress 0.05971075 
## Run 13 stress 0.04099456 
## ... Procrustes: rmse 5.290214e-05  max resid 0.0001465044 
## ... Similar to previous best
## Run 14 stress 0.05971014 
## Run 15 stress 0.04360502 
## Run 16 stress 0.04099766 
## ... Procrustes: rmse 0.001812851  max resid 0.003552028 
## ... Similar to previous best
## Run 17 stress 0.04099454 
## ... New best solution
## ... Procrustes: rmse 0.0004380337  max resid 0.0008285491 
## ... Similar to previous best
## Run 18 stress 0.04099468 
## ... Procrustes: rmse 0.0005272132  max resid 0.0009471409 
## ... Similar to previous best
## Run 19 stress 0.04100388 
## ... Procrustes: rmse 0.002706279  max resid 0.004962037 
## ... Similar to previous best
## Run 20 stress 0.04103947 
## ... Procrustes: rmse 0.005039201  max resid 0.008993202 
## *** Best solution repeated 3 times

## [1] 0.19279544 0.07798005 0.04099454
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1943328 
## Run 1 stress 0.413639 
## Run 2 stress 0.2900389 
## Run 3 stress 0.3245001 
## Run 4 stress 0.2755258 
## Run 5 stress 0.3728355 
## Run 6 stress 0.3594308 
## Run 7 stress 0.4538464 
## Run 8 stress 0.2067003 
## Run 9 stress 0.1943328 
## ... New best solution
## ... Procrustes: rmse 2.390165e-07  max resid 5.178136e-07 
## ... Similar to previous best
## Run 10 stress 0.4259043 
## Run 11 stress 0.1943328 
## ... Procrustes: rmse 1.919467e-07  max resid 4.291512e-07 
## ... Similar to previous best
## Run 12 stress 0.1950975 
## Run 13 stress 0.2077259 
## Run 14 stress 0.1950975 
## Run 15 stress 0.1942141 
## ... New best solution
## ... Procrustes: rmse 0.01688855  max resid 0.03546575 
## Run 16 stress 0.2125097 
## Run 17 stress 0.3740071 
## Run 18 stress 0.3536908 
## Run 19 stress 0.1942141 
## ... Procrustes: rmse 5.554985e-07  max resid 1.308264e-06 
## ... Similar to previous best
## Run 20 stress 0.3969587 
## *** Best solution repeated 1 times
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1124106 
## Run 1 stress 0.07798037 
## ... New best solution
## ... Procrustes: rmse 0.1287562  max resid 0.277331 
## Run 2 stress 0.07798009 
## ... New best solution
## ... Procrustes: rmse 0.000262116  max resid 0.00077837 
## ... Similar to previous best
## Run 3 stress 0.1155293 
## Run 4 stress 0.1155292 
## Run 5 stress 0.114373 
## Run 6 stress 0.1096313 
## Run 7 stress 0.07798037 
## ... Procrustes: rmse 0.0005842807  max resid 0.001751259 
## ... Similar to previous best
## Run 8 stress 0.1183322 
## Run 9 stress 0.07798006 
## ... New best solution
## ... Procrustes: rmse 0.0002678849  max resid 0.000789668 
## ... Similar to previous best
## Run 10 stress 0.07798013 
## ... Procrustes: rmse 0.000144215  max resid 0.0004079904 
## ... Similar to previous best
## Run 11 stress 0.1669167 
## Run 12 stress 0.07798005 
## ... New best solution
## ... Procrustes: rmse 4.568267e-05  max resid 0.0001215872 
## ... Similar to previous best
## Run 13 stress 0.1088575 
## Run 14 stress 0.1096314 
## Run 15 stress 0.0779804 
## ... Procrustes: rmse 0.0005210257  max resid 0.001566148 
## ... Similar to previous best
## Run 16 stress 0.07798029 
## ... Procrustes: rmse 0.0002778744  max resid 0.0008061927 
## ... Similar to previous best
## Run 17 stress 0.1143729 
## Run 18 stress 0.07798024 
## ... Procrustes: rmse 0.0003987837  max resid 0.001197606 
## ... Similar to previous best
## Run 19 stress 0.07798033 
## ... Procrustes: rmse 0.0003307509  max resid 0.0009848813 
## ... Similar to previous best
## Run 20 stress 0.1143729 
## *** Best solution repeated 5 times

## [1] 0.19421410 0.07798005
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1943328 
## Run 1 stress 0.4404568 
## Run 2 stress 0.5255438 
## Run 3 stress 0.3668296 
## Run 4 stress 0.2055386 
## Run 5 stress 0.480681 
## Run 6 stress 0.3414128 
## Run 7 stress 0.3324098 
## Run 8 stress 0.4754485 
## Run 9 stress 0.1950975 
## Run 10 stress 0.2529896 
## Run 11 stress 0.3658251 
## Run 12 stress 0.3717855 
## Run 13 stress 0.4750728 
## Run 14 stress 0.3001204 
## Run 15 stress 0.2749941 
## Run 16 stress 0.356021 
## Run 17 stress 0.3899074 
## Run 18 stress 0.2910376 
## Run 19 stress 0.2730635 
## Run 20 stress 0.4445379 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: scale factor of the gradient < sfgrmin

## [1] 0.1943328
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1124106 
## Run 1 stress 0.07798018 
## ... New best solution
## ... Procrustes: rmse 0.1287681  max resid 0.2774338 
## Run 2 stress 0.1669167 
## Run 3 stress 0.1155291 
## Run 4 stress 0.07798004 
## ... New best solution
## ... Procrustes: rmse 0.0003062804  max resid 0.0008895488 
## ... Similar to previous best
## Run 5 stress 0.1088575 
## Run 6 stress 0.114373 
## Run 7 stress 0.0779802 
## ... Procrustes: rmse 0.0002706103  max resid 0.0008052747 
## ... Similar to previous best
## Run 8 stress 0.114373 
## Run 9 stress 0.07798023 
## ... Procrustes: rmse 0.0003447398  max resid 0.001007648 
## ... Similar to previous best
## Run 10 stress 0.1143729 
## Run 11 stress 0.2305539 
## Run 12 stress 0.07798029 
## ... Procrustes: rmse 0.0003814663  max resid 0.001150712 
## ... Similar to previous best
## Run 13 stress 0.07798037 
## ... Procrustes: rmse 0.0004255913  max resid 0.001261226 
## ... Similar to previous best
## Run 14 stress 0.1088575 
## Run 15 stress 0.1155292 
## Run 16 stress 0.1143729 
## Run 17 stress 0.114373 
## Run 18 stress 0.1124105 
## Run 19 stress 0.0779802 
## ... Procrustes: rmse 0.000286567  max resid 0.000864704 
## ... Similar to previous best
## Run 20 stress 0.07798025 
## ... Procrustes: rmse 0.0003257858  max resid 0.000975932 
## ... Similar to previous best
## *** Best solution repeated 7 times

## Wisconsin double standardization
## Run 0 stress 0.1260786 
## Run 1 stress 0.1615005 
## Run 2 stress 0.1038977 
## ... New best solution
## ... Procrustes: rmse 0.1879968  max resid 0.4002379 
## Run 3 stress 0.1233545 
## Run 4 stress 0.1504371 
## Run 5 stress 0.105237 
## Run 6 stress 0.1198666 
## Run 7 stress 0.2173669 
## Run 8 stress 0.105883 
## Run 9 stress 0.1187794 
## Run 10 stress 0.1679179 
## Run 11 stress 0.1170822 
## Run 12 stress 0.1159565 
## Run 13 stress 0.1177415 
## Run 14 stress 0.1708731 
## Run 15 stress 0.1319013 
## Run 16 stress 0.1728781 
## Run 17 stress 0.1846129 
## Run 18 stress 0.1018311 
## ... New best solution
## ... Procrustes: rmse 0.1559028  max resid 0.2845061 
## Run 19 stress 0.2002936 
## Run 20 stress 0.1169588 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      6: stress ratio > sratmax
##     14: scale factor of the gradient < sfgrmin
## Wisconsin double standardization
## Run 0 stress 0.04638449 
## Run 1 stress 0.04647212 
## ... Procrustes: rmse 0.1455698  max resid 0.3689025 
## Run 2 stress 0.05525682 
## Run 3 stress 0.02883845 
## ... New best solution
## ... Procrustes: rmse 0.1666909  max resid 0.2844353 
## Run 4 stress 0.05359211 
## Run 5 stress 0.0331679 
## Run 6 stress 0.02437162 
## ... New best solution
## ... Procrustes: rmse 0.1838275  max resid 0.3439847 
## Run 7 stress 0.06154179 
## Run 8 stress 9.176149e-05 
## ... New best solution
## ... Procrustes: rmse 0.1938413  max resid 0.4466419 
## Run 9 stress 0.06354182 
## Run 10 stress 0.02421754 
## Run 11 stress 0.04343092 
## Run 12 stress 0.03841728 
## Run 13 stress 0.03421767 
## Run 14 stress 0.05551083 
## Run 15 stress 0.05034388 
## Run 16 stress 0.05466926 
## Run 17 stress 0.04216925 
## Run 18 stress 9.407029e-05 
## ... Procrustes: rmse 0.02903055  max resid 0.05612763 
## Run 19 stress 0.0317483 
## Run 20 stress 0.0453951 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      8: no. of iterations >= maxit
##      2: stress < smin
##     10: stress ratio > sratmax
## Warning in metaMDS(matrix, distance = distance, k = i, trymax = trymax, :
## stress is (nearly) zero: you may have insufficient data
## Wisconsin double standardization
## Run 0 stress 6.813294e-05 
## Run 1 stress 9.896408e-05 
## ... Procrustes: rmse 0.1637981  max resid 0.261745 
## Run 2 stress 8.862762e-05 
## ... Procrustes: rmse 0.1901985  max resid 0.3565592 
## Run 3 stress 9.452926e-05 
## ... Procrustes: rmse 0.2049385  max resid 0.3757502 
## Run 4 stress 9.394402e-05 
## ... Procrustes: rmse 0.184007  max resid 0.3149623 
## Run 5 stress 8.086832e-05 
## ... Procrustes: rmse 0.1848313  max resid 0.3283084 
## Run 6 stress 8.35827e-05 
## ... Procrustes: rmse 0.1828036  max resid 0.3145629 
## Run 7 stress 9.528138e-05 
## ... Procrustes: rmse 0.1987372  max resid 0.4399993 
## Run 8 stress 0.02473764 
## Run 9 stress 9.435425e-05 
## ... Procrustes: rmse 0.1528865  max resid 0.3130526 
## Run 10 stress 0.03405059 
## Run 11 stress 0.02230655 
## Run 12 stress 7.500288e-05 
## ... Procrustes: rmse 0.1824924  max resid 0.2831157 
## Run 13 stress 9.78121e-05 
## ... Procrustes: rmse 0.190123  max resid 0.4110007 
## Run 14 stress 0.000114756 
## ... Procrustes: rmse 0.1816033  max resid 0.2467262 
## Run 15 stress 9.79947e-05 
## ... Procrustes: rmse 0.1799444  max resid 0.3476988 
## Run 16 stress 8.86299e-05 
## ... Procrustes: rmse 0.1873537  max resid 0.3561717 
## Run 17 stress 0.0285121 
## Run 18 stress 9.501063e-05 
## ... Procrustes: rmse 0.1480927  max resid 0.3431173 
## Run 19 stress 0.01566551 
## Run 20 stress 9.629843e-05 
## ... Procrustes: rmse 0.1843928  max resid 0.3230523 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##      6: no. of iterations >= maxit
##     14: stress < smin
## Warning in metaMDS(matrix, distance = distance, k = i, trymax = trymax, :
## stress is (nearly) zero: you may have insufficient data

## [1] 1.018311e-01 9.176149e-05 6.813294e-05
## Wisconsin double standardization
## Run 0 stress 0.1260786 
## Run 1 stress 0.1007245 
## ... New best solution
## ... Procrustes: rmse 0.145201  max resid 0.2970199 
## Run 2 stress 0.1295787 
## Run 3 stress 0.1721973 
## Run 4 stress 0.1519206 
## Run 5 stress 0.1731893 
## Run 6 stress 0.1357672 
## Run 7 stress 0.152639 
## Run 8 stress 0.1237353 
## Run 9 stress 0.1130331 
## Run 10 stress 0.1311712 
## Run 11 stress 0.1832615 
## Run 12 stress 0.1264218 
## Run 13 stress 0.103457 
## Run 14 stress 0.1080866 
## Run 15 stress 0.1407359 
## Run 16 stress 0.1805179 
## Run 17 stress 0.1127234 
## Run 18 stress 0.1557465 
## Run 19 stress 0.1287269 
## Run 20 stress 0.112385 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     11: stress ratio > sratmax
##      9: scale factor of the gradient < sfgrmin
## Wisconsin double standardization
## Run 0 stress 0.04638449 
## Run 1 stress 0.05477751 
## Run 2 stress 0.02055709 
## ... New best solution
## ... Procrustes: rmse 0.1676508  max resid 0.3173443 
## Run 3 stress 0.03542245 
## Run 4 stress 0.06339609 
## Run 5 stress 0.06354552 
## Run 6 stress 0.03434067 
## Run 7 stress 0.04904367 
## Run 8 stress 0.02427022 
## Run 9 stress 0.04943973 
## Run 10 stress 0.07217386 
## Run 11 stress 0.06614757 
## Run 12 stress 0.01402032 
## ... New best solution
## ... Procrustes: rmse 0.1466311  max resid 0.3708759 
## Run 13 stress 9.758908e-05 
## ... New best solution
## ... Procrustes: rmse 0.1142528  max resid 0.1897005 
## Run 14 stress 0.02570813 
## Run 15 stress 0.08477442 
## Run 16 stress 0.05823125 
## Run 17 stress 0.04232931 
## Run 18 stress 0.07039224 
## Run 19 stress 0.03858976 
## Run 20 stress 0.04583004 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     10: no. of iterations >= maxit
##      1: stress < smin
##      9: stress ratio > sratmax
## Warning in metaMDS(matrix, distance = distance, k = i, trymax = trymax, :
## stress is (nearly) zero: you may have insufficient data

## [1] 1.007245e-01 9.758908e-05
## Wisconsin double standardization
## Run 0 stress 0.1260786 
## Run 1 stress 0.203343 
## Run 2 stress 0.1388219 
## Run 3 stress 0.1214044 
## ... New best solution
## ... Procrustes: rmse 0.2570448  max resid 0.4814823 
## Run 4 stress 0.1350864 
## Run 5 stress 0.1820451 
## Run 6 stress 0.1188754 
## ... New best solution
## ... Procrustes: rmse 0.2330044  max resid 0.4846903 
## Run 7 stress 0.1586272 
## Run 8 stress 0.1369796 
## Run 9 stress 0.1141689 
## ... New best solution
## ... Procrustes: rmse 0.2623281  max resid 0.4492254 
## Run 10 stress 0.1506308 
## Run 11 stress 0.1162774 
## Run 12 stress 0.1342414 
## Run 13 stress 0.1556626 
## Run 14 stress 0.1720211 
## Run 15 stress 0.1431112 
## Run 16 stress 0.1525006 
## Run 17 stress 0.1312591 
## Run 18 stress 0.162248 
## Run 19 stress 0.1646557 
## Run 20 stress 0.1904715 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     10: stress ratio > sratmax
##     10: scale factor of the gradient < sfgrmin
## [1] 0.1141689
## Wisconsin double standardization
## Run 0 stress 0.04638449 
## Run 1 stress 0.04644459 
## ... Procrustes: rmse 0.2056431  max resid 0.3306612 
## Run 2 stress 0.05161009 
## Run 3 stress 0.06929451 
## Run 4 stress 0.03852015 
## ... New best solution
## ... Procrustes: rmse 0.1781842  max resid 0.3840539 
## Run 5 stress 0.02432429 
## ... New best solution
## ... Procrustes: rmse 0.1581082  max resid 0.2960934 
## Run 6 stress 0.03481109 
## Run 7 stress 0.0367112 
## Run 8 stress 0.03387116 
## Run 9 stress 0.05687228 
## Run 10 stress 0.05563853 
## Run 11 stress 0.02441238 
## ... Procrustes: rmse 0.08048097  max resid 0.2393151 
## Run 12 stress 0.05035522 
## Run 13 stress 0.03009459 
## Run 14 stress 0.06921252 
## Run 15 stress 8.93295e-05 
## ... New best solution
## ... Procrustes: rmse 0.1347681  max resid 0.3516269 
## Run 16 stress 0.02054638 
## Run 17 stress 0.02660606 
## Run 18 stress 0.05745513 
## Run 19 stress 9.311642e-05 
## ... Procrustes: rmse 0.1064271  max resid 0.1929925 
## Run 20 stress 0.03848708 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     11: no. of iterations >= maxit
##      2: stress < smin
##      7: stress ratio > sratmax
## Warning in metaMDS(pivoted_data[, -1], k = 2): stress is (nearly) zero: you may
## have insufficient data

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.107638 
## Run 1 stress 0.195674 
## Run 2 stress 0.1113015 
## Run 3 stress 0.09468634 
## ... New best solution
## ... Procrustes: rmse 0.1036155  max resid 0.2466268 
## Run 4 stress 0.09758271 
## Run 5 stress 0.09691804 
## Run 6 stress 0.08986221 
## ... New best solution
## ... Procrustes: rmse 0.05151907  max resid 0.1107482 
## Run 7 stress 0.2106312 
## Run 8 stress 0.1311567 
## Run 9 stress 0.1430587 
## Run 10 stress 0.2238203 
## Run 11 stress 0.1733678 
## Run 12 stress 0.2075548 
## Run 13 stress 0.09273886 
## Run 14 stress 0.105575 
## Run 15 stress 0.1260755 
## Run 16 stress 0.1308612 
## Run 17 stress 0.095987 
## Run 18 stress 0.1015187 
## Run 19 stress 0.2049858 
## Run 20 stress 0.1026213 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     12: stress ratio > sratmax
##      8: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03287211 
## Run 1 stress 0.02706117 
## ... New best solution
## ... Procrustes: rmse 0.1025107  max resid 0.1758561 
## Run 2 stress 0.05732119 
## Run 3 stress 0.03000261 
## Run 4 stress 0.0295881 
## Run 5 stress 0.02644087 
## ... New best solution
## ... Procrustes: rmse 0.0359606  max resid 0.09495919 
## Run 6 stress 0.04956967 
## Run 7 stress 0.04956518 
## Run 8 stress 0.06205091 
## Run 9 stress 0.02801466 
## Run 10 stress 0.06000092 
## Run 11 stress 0.06726644 
## Run 12 stress 0.02936456 
## Run 13 stress 0.03064426 
## Run 14 stress 0.03642475 
## Run 15 stress 0.03341343 
## Run 16 stress 0.02697461 
## Run 17 stress 0.02876596 
## Run 18 stress 0.02889734 
## Run 19 stress 0.02966154 
## Run 20 stress 0.02847138 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     17: no. of iterations >= maxit
##      3: stress ratio > sratmax
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.01422236 
## Run 1 stress 0.0160425 
## Run 2 stress 0.01184598 
## ... New best solution
## ... Procrustes: rmse 0.05937423  max resid 0.108933 
## Run 3 stress 0.01969966 
## Run 4 stress 0.04573324 
## Run 5 stress 0.0123984 
## Run 6 stress 0.01201006 
## ... Procrustes: rmse 0.1164808  max resid 0.2582575 
## Run 7 stress 0.01154864 
## ... New best solution
## ... Procrustes: rmse 0.1362879  max resid 0.2768663 
## Run 8 stress 0.01276063 
## Run 9 stress 0.01248348 
## Run 10 stress 0.01201496 
## ... Procrustes: rmse 0.07089972  max resid 0.1165253 
## Run 11 stress 0.01484572 
## Run 12 stress 0.01522506 
## Run 13 stress 0.01481153 
## Run 14 stress 0.0116678 
## ... Procrustes: rmse 0.06632489  max resid 0.1605576 
## Run 15 stress 0.05173728 
## Run 16 stress 0.01325485 
## Run 17 stress 0.01233749 
## Run 18 stress 0.01558809 
## Run 19 stress 0.01179447 
## ... Procrustes: rmse 0.1010405  max resid 0.2421525 
## Run 20 stress 0.0303378 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: no. of iterations >= maxit

## [1] 0.08986221 0.02644087 0.01154864
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.107638 
## Run 1 stress 0.1498635 
## Run 2 stress 0.1913051 
## Run 3 stress 0.09253764 
## ... New best solution
## ... Procrustes: rmse 0.1387214  max resid 0.304312 
## Run 4 stress 0.1820671 
## Run 5 stress 0.09392184 
## Run 6 stress 0.2160058 
## Run 7 stress 0.1911391 
## Run 8 stress 0.1366461 
## Run 9 stress 0.1597226 
## Run 10 stress 0.1255343 
## Run 11 stress 0.09459803 
## Run 12 stress 0.0921741 
## ... New best solution
## ... Procrustes: rmse 0.1287216  max resid 0.3800495 
## Run 13 stress 0.1178513 
## Run 14 stress 0.1103657 
## Run 15 stress 0.1054541 
## Run 16 stress 0.1288656 
## Run 17 stress 0.1005663 
## Run 18 stress 0.1019884 
## Run 19 stress 0.09930534 
## Run 20 stress 0.1037488 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     14: stress ratio > sratmax
##      6: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03287211 
## Run 1 stress 0.05302126 
## Run 2 stress 0.04963663 
## Run 3 stress 0.02931672 
## ... New best solution
## ... Procrustes: rmse 0.1355068  max resid 0.2518441 
## Run 4 stress 0.04962142 
## Run 5 stress 0.03662982 
## Run 6 stress 0.05260794 
## Run 7 stress 0.03670526 
## Run 8 stress 0.0347045 
## Run 9 stress 0.05302348 
## Run 10 stress 0.04163794 
## Run 11 stress 0.05732297 
## Run 12 stress 0.02918001 
## ... New best solution
## ... Procrustes: rmse 0.1595063  max resid 0.2450427 
## Run 13 stress 0.02684856 
## ... New best solution
## ... Procrustes: rmse 0.1633937  max resid 0.2934725 
## Run 14 stress 0.03044057 
## Run 15 stress 0.05805467 
## Run 16 stress 0.04962124 
## Run 17 stress 0.05755521 
## Run 18 stress 0.02944696 
## Run 19 stress 0.05374777 
## Run 20 stress 0.05732111 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     13: no. of iterations >= maxit
##      7: stress ratio > sratmax

## [1] 0.09217410 0.02684856
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.107638 
## Run 1 stress 0.09495336 
## ... New best solution
## ... Procrustes: rmse 0.08284395  max resid 0.151722 
## Run 2 stress 0.1013772 
## Run 3 stress 0.1409984 
## Run 4 stress 0.0964131 
## Run 5 stress 0.09409631 
## ... New best solution
## ... Procrustes: rmse 0.04198279  max resid 0.09182676 
## Run 6 stress 0.1092166 
## Run 7 stress 0.2059908 
## Run 8 stress 0.1428996 
## Run 9 stress 0.08433692 
## ... New best solution
## ... Procrustes: rmse 0.1095314  max resid 0.3287707 
## Run 10 stress 0.09379732 
## Run 11 stress 0.1100792 
## Run 12 stress 0.09042387 
## Run 13 stress 0.08936437 
## Run 14 stress 0.09345407 
## Run 15 stress 0.1122311 
## Run 16 stress 0.09341088 
## Run 17 stress 0.1885429 
## Run 18 stress 0.2051392 
## Run 19 stress 0.1665594 
## Run 20 stress 0.09994648 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     14: stress ratio > sratmax
##      6: scale factor of the gradient < sfgrmin

## [1] 0.08433692
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.03287211 
## Run 1 stress 0.0495648 
## Run 2 stress 0.0283978 
## ... New best solution
## ... Procrustes: rmse 0.1506162  max resid 0.4143981 
## Run 3 stress 0.04399708 
## Run 4 stress 0.02746676 
## ... New best solution
## ... Procrustes: rmse 0.1866391  max resid 0.5099999 
## Run 5 stress 0.02644227 
## ... New best solution
## ... Procrustes: rmse 0.1349  max resid 0.295937 
## Run 6 stress 0.03485698 
## Run 7 stress 0.03001853 
## Run 8 stress 0.02681662 
## ... Procrustes: rmse 0.1186172  max resid 0.2612048 
## Run 9 stress 0.04406046 
## Run 10 stress 0.06000121 
## Run 11 stress 0.05302191 
## Run 12 stress 0.02764933 
## Run 13 stress 0.03554706 
## Run 14 stress 0.04956439 
## Run 15 stress 0.04956494 
## Run 16 stress 0.02674716 
## ... Procrustes: rmse 0.1196372  max resid 0.2753668 
## Run 17 stress 0.04956518 
## Run 18 stress 0.03686226 
## Run 19 stress 0.02876901 
## Run 20 stress 0.02876359 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     15: no. of iterations >= maxit
##      5: stress ratio > sratmax

## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2432883 
## Run 1 stress 0.2005041 
## ... New best solution
## ... Procrustes: rmse 0.1797811  max resid 0.4073555 
## Run 2 stress 0.1492792 
## ... New best solution
## ... Procrustes: rmse 0.09066636  max resid 0.2187461 
## Run 3 stress 0.1982219 
## Run 4 stress 0.275381 
## Run 5 stress 0.2736742 
## Run 6 stress 0.2763352 
## Run 7 stress 0.2014731 
## Run 8 stress 0.2099927 
## Run 9 stress 0.2563287 
## Run 10 stress 0.287374 
## Run 11 stress 0.1414606 
## ... New best solution
## ... Procrustes: rmse 0.02879065  max resid 0.05849106 
## Run 12 stress 0.2872084 
## Run 13 stress 0.2777393 
## Run 14 stress 0.2013436 
## Run 15 stress 0.1444795 
## Run 16 stress 0.1875126 
## Run 17 stress 0.184394 
## Run 18 stress 0.217751 
## Run 19 stress 0.1955774 
## Run 20 stress 0.2097698 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.06870377 
## Run 1 stress 0.06870354 
## ... New best solution
## ... Procrustes: rmse 0.0001440208  max resid 0.0004140217 
## ... Similar to previous best
## Run 2 stress 0.06596665 
## ... New best solution
## ... Procrustes: rmse 0.02617494  max resid 0.06023465 
## Run 3 stress 0.07348368 
## Run 4 stress 0.06870362 
## Run 5 stress 0.06870361 
## Run 6 stress 0.1069169 
## Run 7 stress 0.06596584 
## ... New best solution
## ... Procrustes: rmse 0.0006891548  max resid 0.002076878 
## ... Similar to previous best
## Run 8 stress 0.07990749 
## Run 9 stress 0.07990756 
## Run 10 stress 0.07990764 
## Run 11 stress 0.06596685 
## ... Procrustes: rmse 0.002113794  max resid 0.006364691 
## ... Similar to previous best
## Run 12 stress 0.06870334 
## Run 13 stress 0.07348402 
## Run 14 stress 0.0734835 
## Run 15 stress 0.07990781 
## Run 16 stress 0.104344 
## Run 17 stress 0.07990768 
## Run 18 stress 0.06596699 
## ... Procrustes: rmse 0.0008321829  max resid 0.002505762 
## ... Similar to previous best
## Run 19 stress 0.09258065 
## Run 20 stress 0.1570495 
## *** Best solution repeated 3 times
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.02275913 
## Run 1 stress 0.02212176 
## ... New best solution
## ... Procrustes: rmse 0.1038378  max resid 0.2945243 
## Run 2 stress 0.02221842 
## ... Procrustes: rmse 0.09055185  max resid 0.2602679 
## Run 3 stress 0.02178296 
## ... New best solution
## ... Procrustes: rmse 0.05550052  max resid 0.1569568 
## Run 4 stress 0.02185097 
## ... Procrustes: rmse 0.01155222  max resid 0.03275711 
## Run 5 stress 0.02360378 
## Run 6 stress 0.02181676 
## ... Procrustes: rmse 0.005035596  max resid 0.01386529 
## Run 7 stress 0.02197213 
## ... Procrustes: rmse 0.03701802  max resid 0.1039729 
## Run 8 stress 0.02176694 
## ... New best solution
## ... Procrustes: rmse 0.004928824  max resid 0.0138589 
## Run 9 stress 0.02202521 
## ... Procrustes: rmse 0.0396489  max resid 0.1124044 
## Run 10 stress 0.02375199 
## Run 11 stress 0.02198844 
## ... Procrustes: rmse 0.03684882  max resid 0.104643 
## Run 12 stress 0.02212597 
## ... Procrustes: rmse 0.05217235  max resid 0.1496327 
## Run 13 stress 0.02399484 
## Run 14 stress 0.02405759 
## Run 15 stress 0.02251671 
## Run 16 stress 0.02442058 
## Run 17 stress 0.02177125 
## ... Procrustes: rmse 0.005619633  max resid 0.01568863 
## Run 18 stress 0.02196946 
## ... Procrustes: rmse 0.03984433  max resid 0.1133911 
## Run 19 stress 0.0231586 
## Run 20 stress 0.02259384 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: no. of iterations >= maxit

## [1] 0.14146056 0.06596584 0.02176694
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2432883 
## Run 1 stress 0.1444243 
## ... New best solution
## ... Procrustes: rmse 0.1433319  max resid 0.4074722 
## Run 2 stress 0.1911551 
## Run 3 stress 0.1986083 
## Run 4 stress 0.1444243 
## ... Procrustes: rmse 1.150888e-06  max resid 2.916415e-06 
## ... Similar to previous best
## Run 5 stress 0.292168 
## Run 6 stress 0.1414606 
## ... New best solution
## ... Procrustes: rmse 0.02249834  max resid 0.05662958 
## Run 7 stress 0.2915288 
## Run 8 stress 0.2593832 
## Run 9 stress 0.2322329 
## Run 10 stress 0.2535363 
## Run 11 stress 0.3076013 
## Run 12 stress 0.2098618 
## Run 13 stress 0.2961442 
## Run 14 stress 0.3030775 
## Run 15 stress 0.1451452 
## Run 16 stress 0.4792608 
## Run 17 stress 0.1420164 
## Run 18 stress 0.290279 
## Run 19 stress 0.2904474 
## Run 20 stress 0.2234517 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: scale factor of the gradient < sfgrmin
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.06870377 
## Run 1 stress 0.06870312 
## ... New best solution
## ... Procrustes: rmse 0.0004465134  max resid 0.001306033 
## ... Similar to previous best
## Run 2 stress 0.09477481 
## Run 3 stress 0.1083853 
## Run 4 stress 0.1083853 
## Run 5 stress 0.1607804 
## Run 6 stress 0.1021777 
## Run 7 stress 0.07990651 
## Run 8 stress 0.1043544 
## Run 9 stress 0.06596676 
## ... New best solution
## ... Procrustes: rmse 0.02612557  max resid 0.06033822 
## Run 10 stress 0.07348395 
## Run 11 stress 0.07990727 
## Run 12 stress 0.1045591 
## Run 13 stress 0.07348373 
## Run 14 stress 0.07990686 
## Run 15 stress 0.06596657 
## ... New best solution
## ... Procrustes: rmse 0.002663905  max resid 0.00802223 
## ... Similar to previous best
## Run 16 stress 0.06596657 
## ... New best solution
## ... Procrustes: rmse 6.033653e-06  max resid 1.788869e-05 
## ... Similar to previous best
## Run 17 stress 0.1570494 
## Run 18 stress 0.07348288 
## Run 19 stress 0.1043544 
## Run 20 stress 0.06596669 
## ... Procrustes: rmse 0.0001388569  max resid 0.0004061346 
## ... Similar to previous best
## *** Best solution repeated 2 times

## [1] 0.14146056 0.06596657
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2432883 
## Run 1 stress 0.2533715 
## Run 2 stress 0.5149356 
## Run 3 stress 0.2576789 
## Run 4 stress 0.2943772 
## Run 5 stress 0.2008392 
## ... New best solution
## ... Procrustes: rmse 0.1852718  max resid 0.3966593 
## Run 6 stress 0.2259715 
## Run 7 stress 0.4019818 
## Run 8 stress 0.4106443 
## Run 9 stress 0.3086709 
## Run 10 stress 0.1745059 
## ... New best solution
## ... Procrustes: rmse 0.1009503  max resid 0.2123004 
## Run 11 stress 0.2515758 
## Run 12 stress 0.2005041 
## Run 13 stress 0.4735085 
## Run 14 stress 0.2767366 
## Run 15 stress 0.1444243 
## ... New best solution
## ... Procrustes: rmse 0.04424901  max resid 0.1330738 
## Run 16 stress 0.2728852 
## Run 17 stress 0.1444243 
## ... New best solution
## ... Procrustes: rmse 1.842742e-07  max resid 4.889771e-07 
## ... Similar to previous best
## Run 18 stress 0.1444243 
## ... Procrustes: rmse 1.002658e-06  max resid 2.108346e-06 
## ... Similar to previous best
## Run 19 stress 0.1639899 
## Run 20 stress 0.2844137 
## *** Best solution repeated 2 times

## [1] 0.1444243
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.06870377 
## Run 1 stress 0.06596635 
## ... New best solution
## ... Procrustes: rmse 0.02622634  max resid 0.06013882 
## Run 2 stress 0.06596725 
## ... Procrustes: rmse 0.0005417882  max resid 0.001633643 
## ... Similar to previous best
## Run 3 stress 0.07990656 
## Run 4 stress 0.1575941 
## Run 5 stress 0.06870363 
## Run 6 stress 0.06870464 
## Run 7 stress 0.06596642 
## ... Procrustes: rmse 0.002336689  max resid 0.007037219 
## ... Similar to previous best
## Run 8 stress 0.0799073 
## Run 9 stress 0.06874223 
## Run 10 stress 0.09477508 
## Run 11 stress 0.06596641 
## ... Procrustes: rmse 3.240541e-05  max resid 8.997533e-05 
## ... Similar to previous best
## Run 12 stress 0.0687033 
## Run 13 stress 0.07348406 
## Run 14 stress 0.07990703 
## Run 15 stress 0.06596706 
## ... Procrustes: rmse 0.00272763  max resid 0.008213281 
## ... Similar to previous best
## Run 16 stress 0.07990725 
## Run 17 stress 0.07990697 
## Run 18 stress 0.06596672 
## ... Procrustes: rmse 0.0002444276  max resid 0.0007368619 
## ... Similar to previous best
## Run 19 stress 0.09258103 
## Run 20 stress 0.0734832 
## *** Best solution repeated 5 times

#### PRINT NMDS RESULTS ####
MMDS <- NMDS_list[[1]]
print(MMDS)
## 
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(pivoted_data[, -1])) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.07798004 
## Stress type 1, weak ties
## Best solution was repeated 7 times in 20 tries
## The best solution was from try 4 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(pivoted_data[, -1]))'
MMDSL <- NMDS_list[[2]]
print(MMDSL)
## 
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(pivoted_data[, -1]) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     8.93295e-05 
## Stress type 1, weak ties
## Best solution was not repeated after 20 tries
## The best solution was from try 15 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(pivoted_data[, -1])'
MMDSM <- NMDS_list[[3]]
print(MMDSM)
## 
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(pivoted_data[, -1])) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.02644227 
## Stress type 1, weak ties
## Best solution was not repeated after 20 tries
## The best solution was from try 5 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(pivoted_data[, -1]))'
MMDSS <- NMDS_list[[4]]
print(MMDSS)
## 
## Call:
## metaMDS(comm = pivoted_data[, -1], k = 2) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(pivoted_data[, -1])) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.06596635 
## Stress type 1, weak ties
## Best solution was repeated 5 times in 20 tries
## The best solution was from try 1 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(pivoted_data[, -1]))'

Diversity indices

#### SIMPSON INDEX ####
Singles_pivoted_data # view data including singletons
## # A tibble: 14 × 155
##    habitat_type   HDP   HEU   HLL   KAO   KAS   KDA   KDT   KGC   KIE   KLC
##    <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 established      6     2     1     2     0     1     0     5    29     0
##  2 established      0     3     0     2     0     3     1     6    17     0
##  3 established      0     0     0     2     0     0     0     2    21     1
##  4 established      0     0     0     1     0    11     0     1     5     0
##  5 established      0     0     0     0     8     0     0     1    14     0
##  6 marantaceae      3     0     0     0     0     1     0     0     0     0
##  7 marantaceae      0     0     0     0     0     1     0     0     3     0
##  8 marantaceae      0     0     0     0     0     0     0     0     0     0
##  9 marantaceae      0     0     0     0     0     0     0     0     0     0
## 10 marantaceae      0     0     0     0     0     0     0     0     0     0
## 11 riverine         2     0     0     0     0     0     0     0     8     0
## 12 riverine         1     0     2     0    11     0     0     0     1     0
## 13 riverine         0     0     0     0     1     0     0     0     0     0
## 14 riverine         0     0     0     0     0     0     0     0     0     0
## # ℹ 144 more variables: KOS <dbl>, KPE <dbl>, KPS <dbl>, KUG <dbl>, KXS <dbl>,
## #   OBB <dbl>, OBC <dbl>, OBH <dbl>, OBR <dbl>, OCD <dbl>, OCE <dbl>,
## #   OCL <dbl>, OCP <dbl>, OCZ <dbl>, ODC <dbl>, ODD <dbl>, ODE <dbl>,
## #   ODO <dbl>, ODW <dbl>, ODY <dbl>, OEE <dbl>, OHF <dbl>, OIL <dbl>,
## #   OIT <dbl>, OKA <dbl>, OLL <dbl>, OLP <dbl>, OMA <dbl>, OML <dbl>,
## #   OMT <dbl>, OMU <dbl>, OMY <dbl>, ONA <dbl>, OOA <dbl>, OOO <dbl>,
## #   OPA <dbl>, OPE <dbl>, OPN <dbl>, OPP <dbl>, OPY <dbl>, ORY <dbl>, …
simpson_index <- diversity(Singles_pivoted_data[,2:155], index = "simpson") # all numerical data run as a shannon index
simpson_index # view
##  [1] 0.7659042 0.8188579 0.8809521 0.8876395 0.7836887 0.8751486 0.7898439
##  [8] 0.5553086 0.4444444 0.5000000 0.7701952 0.8061702 0.7114419 0.9018444
# create a new data frame with the Shannon index data
simpson_df <- data.frame(simpson_index)

# add new data frame as a new column
Simpson_Indexes <- cbind(Singles_pivoted_data[1], simpson_df)

# plot
ggplot(data = Simpson_Indexes, aes(x = habitat_type, y = simpson_index, fill = habitat_type)) +
  geom_boxplot(color = "black") +
  scale_fill_manual(values = c('plum3', 'darkseagreen2', 'chartreuse4')) +
  labs(title = "Box Plot of Simpson Index by Habitat Type",
       x = "Habitat Type",
       y = "Simpson Index")

#### SHANNON INDEX ####
Singles_pivoted_data # view data including singletons
## # A tibble: 14 × 155
##    habitat_type   HDP   HEU   HLL   KAO   KAS   KDA   KDT   KGC   KIE   KLC
##    <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 established      6     2     1     2     0     1     0     5    29     0
##  2 established      0     3     0     2     0     3     1     6    17     0
##  3 established      0     0     0     2     0     0     0     2    21     1
##  4 established      0     0     0     1     0    11     0     1     5     0
##  5 established      0     0     0     0     8     0     0     1    14     0
##  6 marantaceae      3     0     0     0     0     1     0     0     0     0
##  7 marantaceae      0     0     0     0     0     1     0     0     3     0
##  8 marantaceae      0     0     0     0     0     0     0     0     0     0
##  9 marantaceae      0     0     0     0     0     0     0     0     0     0
## 10 marantaceae      0     0     0     0     0     0     0     0     0     0
## 11 riverine         2     0     0     0     0     0     0     0     8     0
## 12 riverine         1     0     2     0    11     0     0     0     1     0
## 13 riverine         0     0     0     0     1     0     0     0     0     0
## 14 riverine         0     0     0     0     0     0     0     0     0     0
## # ℹ 144 more variables: KOS <dbl>, KPE <dbl>, KPS <dbl>, KUG <dbl>, KXS <dbl>,
## #   OBB <dbl>, OBC <dbl>, OBH <dbl>, OBR <dbl>, OCD <dbl>, OCE <dbl>,
## #   OCL <dbl>, OCP <dbl>, OCZ <dbl>, ODC <dbl>, ODD <dbl>, ODE <dbl>,
## #   ODO <dbl>, ODW <dbl>, ODY <dbl>, OEE <dbl>, OHF <dbl>, OIL <dbl>,
## #   OIT <dbl>, OKA <dbl>, OLL <dbl>, OLP <dbl>, OMA <dbl>, OML <dbl>,
## #   OMT <dbl>, OMU <dbl>, OMY <dbl>, ONA <dbl>, OOA <dbl>, OOO <dbl>,
## #   OPA <dbl>, OPE <dbl>, OPN <dbl>, OPP <dbl>, OPY <dbl>, ORY <dbl>, …
shannon_index <- diversity(Singles_pivoted_data[,2:155], index = "shannon") # all numerical data run as a shannon index
shannon_index # view
##  [1] 2.1687089 2.3953766 2.6167537 2.7484724 2.1668122 2.3029725 2.0961954
##  [8] 1.2115170 0.6365142 0.8675632 2.0959856 2.1820347 1.7718222 2.7729609
# new data frame with the Shannon index data
shannon_df <- data.frame(shannon_index)

# add new data frame as a new column
Shannon_Indexes <- cbind(Singles_pivoted_data[1], shannon_df)

# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/shannon.pdf"
# Open a PDF device
#pdf(pdf_path)

# plot
ggplot(data = Shannon_Indexes, aes(x = habitat_type, y = shannon_index, fill = habitat_type)) +
  geom_boxplot(color = "black") +
  scale_fill_manual(values = c('plum3', 'darkseagreen2', 'chartreuse4')) +
  labs(title = "Box Plot of Shannon Index by Habitat Type",
       x = "Habitat Type",
       y = "Shannon Index")

#dev.off() #for saving

#### AUTOCORRELATION ####
# none  for simpson
established <- Simpson_Indexes[1:5,2]
riverine <- Simpson_Indexes[11:14,2]
marantaceae <- Simpson_Indexes[6:10,2]
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3671881
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.4176077
## 
## $p.value
## [1] 0.9353877
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3346759
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.2153944
## 
## $p.value
## [1] 0.6942307
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3293823
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.5419306
## 
## $p.value
## [1] 0.883542
# none  for shannon
established <- Shannon_Indexes[1:5,2]
riverine <- Shannon_Indexes[11:14,2]
marantaceae <- Shannon_Indexes[6:10,2]
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.2288976
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.3476527
## 
## $p.value
## [1] 0.7638702
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3581802
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.219035
## 
## $p.value
## [1] 0.6213805
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.5202688
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.5279465
## 
## $p.value
## [1] 0.6087038
#### ANOVA ASSUMPTIONS ####
# established
shapiro_test_resultEST <- shapiro.test(established)
# display the result
print(shapiro_test_resultEST)
## 
##  Shapiro-Wilk normality test
## 
## data:  established
## W = 0.89179, p-value = 0.3662
# riv
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
print(shapiro_test_resultRIV)
## 
##  Shapiro-Wilk normality test
## 
## data:  riverine
## W = 0.94389, p-value = 0.6782
# mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
print(shapiro_test_resultMAR)
## 
##  Shapiro-Wilk normality test
## 
## data:  marantaceae
## W = 0.89645, p-value = 0.3906
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(shannon_index ~ habitat_type, data = Shannon_Indexes)
# display the result
print(bartlett_test_result)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  shannon_index by habitat_type
## Bartlett's K-squared = 3.6012, df = 2, p-value = 0.1652
#### KW TEST ####
kw_result <- kruskal.test(simpson_index ~ habitat_type, data = Simpson_Indexes)
# display test results
print(kw_result)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  simpson_index by habitat_type
## Kruskal-Wallis chi-squared = 2.9457, df = 2, p-value = 0.2293
# post hoc with Dunn's 
dunn_result <- dunn.test(Simpson_Indexes$simpson_index, g = Simpson_Indexes$habitat_type, method = "bonferroni")
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 2.9457, df = 2, p-value = 0.23
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |   1.663043
##          |     0.1445
##          |
## riverine |   0.409800  -1.158132
##          |     1.0000     0.3702
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
# results
print(dunn_result)
## $chi2
## [1] 2.945714
## 
## $Z
## [1]  1.6630437  0.4098006 -1.1581320
## 
## $P
## [1] 0.04815185 0.34097612 0.12340508
## 
## $P.adjusted
## [1] 0.1444555 1.0000000 0.3702152
## 
## $comparisons
## [1] "established - marantaceae" "established - riverine"   
## [3] "marantaceae - riverine"
# KW test
kw_result <- kruskal.test(shannon_index ~ habitat_type, data = Shannon_Indexes)
# display test results
print(kw_result)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  shannon_index by habitat_type
## Kruskal-Wallis chi-squared = 4.8857, df = 2, p-value = 0.08691
# post hoc with Dunn's 
dunn_result <- dunn.test(Shannon_Indexes$shannon_index, g = Shannon_Indexes$habitat_type, method = "bonferroni")
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 4.8857, df = 2, p-value = 0.09
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |   2.192193
##          |     0.0425
##          |
## riverine |   0.783966  -1.282853
##          |     0.6496     0.2993
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
# display
print(dunn_result)
## $chi2
## [1] 4.885714
## 
## $Z
## [1]  2.1921939  0.7839663 -1.2828540
## 
## $P
## [1] 0.01418275 0.21652994 0.09977162
## 
## $P.adjusted
## [1] 0.04254826 0.64958982 0.29931487
## 
## $comparisons
## [1] "established - marantaceae" "established - riverine"   
## [3] "marantaceae - riverine"

DBH

#### GET DATA ####
# habitats
selected_habitats <- c("marantaceae", "riverine", "established")
# subset based on the habitats
subset_DBH <- subset(DBH, Type %in% selected_habitats)


#### HISTOGRAMS OF DATA ####
# color scheme
habitat_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')

## with standard X but nonstandard y
# histograms of DBH for each plot, color coding by habitat
ggplot(subset_DBH, aes(x = DBH, fill = Type)) +
  geom_histogram(binwidth = 5, position = "identity", alpha = 0.7) +
  facet_wrap(~Plot, scales = "free_y") +
  labs(title = "Histograms of DBH by Habitat",
       x = "DBH",
       y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::comma) +
  coord_cartesian(xlim = c(0, 100)) +
  scale_fill_manual(values = habitat_colors)

## standard x and y
# histograms of DBH for each plot, color coding by habitat
ggplot(subset_DBH, aes(x = DBH, fill = Type)) +
  geom_histogram(binwidth = 5, position = "identity", alpha = 0.7) +
  facet_wrap(~Plot, scales = "free_y") +
  labs(title = "Histograms of DBH by Habitat",
       x = "DBH",
       y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::comma, limits = c(0, 30)) +
  coord_cartesian(xlim = c(0, 100)) +
  scale_fill_manual(values = habitat_colors)
## Warning: Removed 1 rows containing missing values (`geom_bar()`).

## combined histograms
# 3 histograms of DBH for each habitat type
# file path to save
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/DBHHists.pdf"
#pdf(pdf_path)

ggplot(subset_DBH, aes(x = DBH, fill = Type)) +
  geom_histogram(binwidth = 5, position = "identity", alpha = 0.7) +
  facet_grid(Type ~ ., scales = "free_y", space = "free_y") +
  labs(title = "Histograms of DBH by Habitat",
       x = "DBH",
       y = "Frequency") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = scales::comma) +
  coord_cartesian(xlim = c(0, 100), ylim = c(0, 30)) +
  scale_fill_manual(values = habitat_colors)

#dev.off() #for saving



#### KS TESTS BETWEENE HABITATS ####
# for each pair of habitat type distributions
habitat_types <- unique(subset_DBH$Type)
ks_test_results <- matrix(NA, nrow = length(habitat_types), ncol = length(habitat_types), dimnames = list(habitat_types, habitat_types))
ks_test_resultsSTATS <- matrix(NA, nrow = length(habitat_types), ncol = length(habitat_types), dimnames = list(habitat_types, habitat_types))

for (i in 1:(length(habitat_types) - 1)) {
  for (j in (i + 1):length(habitat_types)) {
    ks_test_results[i, j] <- ks.test(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],subset_DBH$DBH[subset_DBH$Type == habitat_types[j]])$p.value
    ks_test_results[j, i] <- ks_test_results[i, j]
    
    ks_test_resultsSTATS[i, j] <- ks.test(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],subset_DBH$DBH[subset_DBH$Type == habitat_types[j]])$statistic
    ks_test_resultsSTATS[j, i] <- ks_test_resultsSTATS[i, j]
  }
}
## Warning in ks.test.default(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],
## : p-value will be approximate in the presence of ties
## Warning in ks.test.default(subset_DBH$DBH[subset_DBH$Type == habitat_types[i]],
## : p-value will be approximate in the presence of ties
# print the results
print(ks_test_results)
##                 riverine  established  marantaceae
## riverine              NA 0.0064858967 2.205816e-06
## established 6.485897e-03           NA 5.583663e-04
## marantaceae 2.205816e-06 0.0005583663           NA
# all p's less than 0.05. distrubutions significantly different
# print the test statistic results
print(ks_test_resultsSTATS)
##              riverine established marantaceae
## riverine           NA   0.1888528   0.5364286
## established 0.1888528          NA   0.4202597
## marantaceae 0.5364286   0.4202597          NA
#### KS TESTS WITHIN HABITATS #### 
## riverine
riverine_plot_ids <- unique(subset_DBH$Plot[subset_DBH$Type == "riverine"])
ks_within_plot_results <- matrix(NA, nrow = length(riverine_plot_ids), ncol = length(riverine_plot_ids), dimnames = list(riverine_plot_ids, riverine_plot_ids))

ks_within_plot_results <- matrix(NA, nrow = length(riverine_plot_ids), ncol = length(riverine_plot_ids), dimnames = list(riverine_plot_ids, riverine_plot_ids))

ks_within_plot_resultsSTATS <- matrix(NA, nrow = length(riverine_plot_ids), ncol = length(riverine_plot_ids), dimnames = list(riverine_plot_ids, riverine_plot_ids))

for (i in 1:(length(riverine_plot_ids) - 1)) {
  for (j in (i + 1):length(riverine_plot_ids)) {
    subset_i <- subset_DBH$DBH[subset_DBH$Type == "riverine" & subset_DBH$Plot == riverine_plot_ids[i]]
    subset_j <- subset_DBH$DBH[subset_DBH$Type == "riverine" & subset_DBH$Plot == riverine_plot_ids[j]]
    
    # data in both subsets?
    if (length(subset_i) > 1 && length(subset_j) > 1) {
      ks_within_plot_results[i, j] <- ks.test(subset_i, subset_j)$p.value
      ks_within_plot_results[j, i] <- ks_within_plot_results[i, j]
      
      ks_within_plot_resultsSTATS[i, j] <- ks.test(subset_i, subset_j)$statistic
      ks_within_plot_resultsSTATS[j, i] <- ks_within_plot_resultsSTATS[i, j]
    } else {
      ks_within_plot_results[i, j] <- NA
      ks_within_plot_results[j, i] <- NA
      ks_within_plot_resultsSTATS[i, j] <- NA
    }
  }
}

# print results
print("KS test within plots for riverine:")
## [1] "KS test within plots for riverine:"
print(ks_within_plot_results)
##               R5        R20           R4         R30
## R5            NA 0.14554533 0.0007334696 0.749510182
## R20 0.1455453330         NA 0.0740297605 0.098220073
## R4  0.0007334696 0.07402976           NA 0.008129542
## R30 0.7495101823 0.09822007 0.0081295418          NA
print(ks_within_plot_resultsSTATS)
##            R5       R20        R4       R30
## R5         NA 0.2428571 0.3784083 0.1459627
## R20 0.2428571        NA 0.2610169 0.3000000
## R4  0.3784083 0.2610169        NA 0.3644068
## R30 0.1459627 0.3000000 0.3644068        NA
## marantaceae
marantaceae_plot_ids <- unique(subset_DBH$Plot[subset_DBH$Type == "marantaceae"])
ks_within_plot_results <- matrix(NA, nrow = length(marantaceae_plot_ids), ncol = length(marantaceae_plot_ids), dimnames = list(marantaceae_plot_ids, marantaceae_plot_ids))
ks_within_plot_resultsSTATS <- matrix(NA, nrow = length(marantaceae_plot_ids), ncol = length(marantaceae_plot_ids), dimnames = list(marantaceae_plot_ids, marantaceae_plot_ids))

for (i in 1:(length(marantaceae_plot_ids) - 1)) {
  for (j in (i + 1):length(marantaceae_plot_ids)) {
    subset_i <- subset_DBH$DBH[subset_DBH$Type == "marantaceae" & subset_DBH$Plot == marantaceae_plot_ids[i]]
    subset_j <- subset_DBH$DBH[subset_DBH$Type == "marantaceae" & subset_DBH$Plot == marantaceae_plot_ids[j]]
      ks_within_plot_results[i, j] <- ks.test(subset_i, subset_j)$p.value
      ks_within_plot_results[j, i] <- ks_within_plot_results[i, j]
      ks_within_plot_resultsSTATS[i, j] <- ks.test(subset_i, subset_j)$statistic
      ks_within_plot_resultsSTATS[j, i] <- ks_within_plot_resultsSTATS[i, j]
  }
}

# print results
print("KS test within plots for marantaceae:")
## [1] "KS test within plots for marantaceae:"
print(ks_within_plot_results)
##           M2        M4        M1        M3        M5
## M2        NA 1.0000000 0.5714286 0.7575758 0.7500000
## M4 1.0000000        NA 0.3333333 0.1818182 0.5000000
## M1 0.5714286 0.3333333        NA 0.1658342 0.1666667
## M3 0.7575758 0.1818182 0.1658342        NA 0.5602118
## M5 0.7500000 0.5000000 0.1666667 0.5602118        NA
print(ks_within_plot_resultsSTATS)
##     M2        M4  M1        M3        M5
## M2  NA 0.5000000 0.6 0.5000000 0.5000000
## M4 0.5        NA 1.0 1.0000000 0.8571429
## M1 0.6 1.0000000  NA 0.6000000 0.6000000
## M3 0.5 1.0000000 0.6        NA 0.3571429
## M5 0.5 0.8571429 0.6 0.3571429        NA
## established/dense
established_plot_ids <- unique(subset_DBH$Plot[subset_DBH$Type == "established"])
ks_within_plot_results <- matrix(NA, nrow = length(established_plot_ids), ncol = length(established_plot_ids), dimnames = list(established_plot_ids, established_plot_ids))
ks_within_plot_resultsSTATS <- matrix(NA, nrow = length(established_plot_ids), ncol = length(established_plot_ids), dimnames = list(established_plot_ids, established_plot_ids))

for (i in 1:(length(established_plot_ids) - 1)) {
  for (j in (i + 1):length(established_plot_ids)) {
    subset_i <- subset_DBH$DBH[subset_DBH$Type == "established" & subset_DBH$Plot == established_plot_ids[i]]
    subset_j <- subset_DBH$DBH[subset_DBH$Type == "established" & subset_DBH$Plot == established_plot_ids[j]]
    
    # enough data in both subsets?
    if (length(subset_i) > 1 && length(subset_j) > 1) {
      ks_within_plot_results[i, j] <- ks.test(subset_i, subset_j)$p.value
      ks_within_plot_results[j, i] <- ks_within_plot_results[i, j]
      # get test statistic
    ks_within_plot_resultsSTATS[i, j] <- ks.test(subset_i, subset_j)$statistic
     ks_within_plot_resultsSTATS[j, i] <- ks_within_plot_resultsSTATS[i, j]

    } else {
      ks_within_plot_results[i, j] <- NA
      ks_within_plot_results[j, i] <- NA
      ks_within_plot_resultsSTATS[i, j] <- NA
    }
  }
}

# results
print("KS test within plots for established:")
## [1] "KS test within plots for established:"
print(ks_within_plot_results)
##             F110        F200      F300      F330       F100
## F110          NA 0.005039835 0.1404444 0.0104236 0.08354686
## F200 0.005039835          NA 0.1687311 0.9516712 0.68068988
## F300 0.140444431 0.168731140        NA 0.3601492 0.23567284
## F330 0.010423597 0.951671239 0.3601492        NA 0.72178240
## F100 0.083546859 0.680689882 0.2356728 0.7217824         NA
print(ks_within_plot_resultsSTATS)
##           F110      F200      F300      F330      F100
## F110        NA 0.4028595 0.3596059 0.3876980 0.3040752
## F200 0.4028595        NA 0.3257840 0.1048121 0.1552106
## F300 0.3596059 0.3257840        NA 0.2722008 0.3095238
## F330 0.3876980 0.1048121 0.2722008        NA 0.1531532
## F100 0.3040752 0.1552106 0.3095238 0.1531532        NA
# Create a custom color palette for habitat types
habitat_colors <- c('plum3', 'darkseagreen2', 'chartreuse4')

#### 12-BOX BOXPLOT ####
# box plot for each plot by habitat
#ggplot(subset_DBH, aes(x = factor(Plot), y = DBH, fill = Type)) +
 # geom_boxplot() +
  #facet_wrap(~Type, scales = "free") +
  #labs(title = "Box Plots of DBH by Plot and Habitat",
   #    x = "Plot",
    #   y = "DBH") +
#  scale_y_continuous(limits = c(0, 120)) + # y-axis limits standard
 # scale_fill_manual(values = habitat_colors) + colors()
  #theme_minimal()

# Set the file path where you want to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/DBHBox.pdf"
# Open a PDF device
#pdf(pdf_path)

# 1 box plot per habitat type
#ggplot(subset_DBH, aes(x = factor(Type), y = DBH, fill = Type)) +
 # geom_boxplot() +
  #labs(title = "Box Plots of DBH by Habitat",
   #    x = "Habitat Type",
    #   y = "DBH") +
#  scale_y_continuous(limits = c(0, 120)) +  # limits
 # scale_fill_manual(values = habitat_colors) +  colors
  #theme_minimal()

#dev.off() #for saving



#### KRUSKAL WALLIS TEST ####
kruskal_test_result <- kruskal.test(DBH ~ Type, data = subset_DBH)
# print results
print("Kruskal-Wallis Test:")
## [1] "Kruskal-Wallis Test:"
print(kruskal_test_result)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  DBH by Type
## Kruskal-Wallis chi-squared = 29.83, df = 2, p-value = 3.33e-07
# Post-hoc Dunn's test with Bonferroni correction
posthoc_dunn <- dunn.test(subset_DBH$DBH, subset_DBH$Type, method = "bonferroni")
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 29.83, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |   establis   marantac
## ---------+----------------------
## marantac |  -3.381617
##          |    0.0011*
##          |
## riverine |   3.216846   5.075600
##          |    0.0019*    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
# Print the post-hoc test result
print("Post-hoc Dunn's Test:")
## [1] "Post-hoc Dunn's Test:"
print(posthoc_dunn)
## $chi2
## [1] 29.83004
## 
## $Z
## [1] -3.381618  3.216846  5.075600
## 
## $P
## [1] 3.603017e-04 6.480404e-04 1.931374e-07
## 
## $P.adjusted
## [1] 1.080905e-03 1.944121e-03 5.794121e-07
## 
## $comparisons
## [1] "established - marantaceae" "established - riverine"   
## [3] "marantaceae - riverine"
## autocorr... no autocor
# Calculate mean values for each combination of Plot and Type
mean_values <- subset_DBH %>%
  group_by(Plot, Type) %>%
  summarize(mean_DBH = mean(DBH, na.rm = TRUE))
## `summarise()` has grouped output by 'Plot'. You can override using the
## `.groups` argument.
established <- mean_values[1:5,3]
established <- established$mean_DBH # to a vector
riverine <- mean_values[11:14,3]
riverine <- riverine$mean_DBH # to a vector
marantaceae <- mean_values[6:10,3]
marantaceae <- marantaceae$mean_DBH # to a vector
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.3518021
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.2301075
## 
## $p.value
## [1] 0.9360292
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE)
## $observed
## [1] -0.06408652
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.1477366
## 
## $p.value
## [1] 0.2082429
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.1295925
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.2097542
## 
## $p.value
## [1] 0.5659402
#### ANOVA ASSUMPTIONS ####
# established
shapiro_test_resultEST <- shapiro.test(established)
# display the result
print(shapiro_test_resultEST)
## 
##  Shapiro-Wilk normality test
## 
## data:  established
## W = 0.75547, p-value = 0.03342
# riv
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
print(shapiro_test_resultRIV)
## 
##  Shapiro-Wilk normality test
## 
## data:  riverine
## W = 0.82671, p-value = 0.1594
# mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
print(shapiro_test_resultMAR)
## 
##  Shapiro-Wilk normality test
## 
## data:  marantaceae
## W = 0.94563, p-value = 0.706
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(mean_DBH ~ Type, data = mean_values)
# display the result
print(bartlett_test_result)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  mean_DBH by Type
## Bartlett's K-squared = 12.072, df = 2, p-value = 0.002391

Stem density

#### DBH-ABLE TREES STEM DENSITIES ####
# calculate stem density per plot by counting the number of rows for each plot in each habitat type
stem_density <- subset_DBH %>%
  group_by(Plot, Type) %>%
  summarise(stem_count = n())
## `summarise()` has grouped output by 'Plot'. You can override using the
## `.groups` argument.
# print dataframe result
print(stem_density)
## # A tibble: 14 × 3
## # Groups:   Plot [14]
##    Plot  Type        stem_count
##    <chr> <chr>            <int>
##  1 F100  established         33
##  2 F110  established         29
##  3 F200  established         41
##  4 F300  established         14
##  5 F330  established         37
##  6 M1    marantaceae          5
##  7 M2    marantaceae          2
##  8 M3    marantaceae         10
##  9 M4    marantaceae          1
## 10 M5    marantaceae          7
## 11 R20   riverine            35
## 12 R30   riverine            28
## 13 R4    riverine            59
## 14 R5    riverine            46
# plot stem densities -- calculate average stem density per plot by habitat
average_density <- stem_density %>%
  group_by(Type) %>%
  summarise(avg_density = mean(stem_count))

average_density # view
## # A tibble: 3 × 2
##   Type        avg_density
##   <chr>             <dbl>
## 1 established        30.8
## 2 marantaceae         5  
## 3 riverine           42
# to save the PDF
pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/stemD.pdf"
pdf(pdf_path)
# plot average stem density per plot by habitat as a box plot
ggplot(stem_density, aes(x = Type, y = stem_count, fill = Type)) +
  geom_boxplot() +
  labs(title = "Average Stem Density per Plot by Habitat (Box Plot)",
       x = "Habitat Type",
       y = "Stem Density",
       fill = "Habitat Type") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = habitat_colors)

dev.off()
## quartz_off_screen 
##                 2
#### ANOVA ASSUMPTIONS ####
marantaceae <- stem_density$stem_count[stem_density$Type == "marantaceae"]
riverine <- stem_density$stem_count[stem_density$Type == "riverine"]
established <- stem_density$stem_count[stem_density$Type == "established"]
# mar
shapiro_test_resultMAR <- shapiro.test(marantaceae)
# display the result
print(shapiro_test_resultMAR)
## 
##  Shapiro-Wilk normality test
## 
## data:  marantaceae
## W = 0.95695, p-value = 0.7866
shapiro_test_resultRIV <- shapiro.test(riverine)
# display the result
print(shapiro_test_resultRIV)
## 
##  Shapiro-Wilk normality test
## 
## data:  riverine
## W = 0.97313, p-value = 0.8608
shapiro_test_resultEST <- shapiro.test(established)
# display the result
print(shapiro_test_resultEST)
## 
##  Shapiro-Wilk normality test
## 
## data:  established
## W = 0.9128, p-value = 0.4846
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(stem_count ~ Type, data = stem_density)
# display the result
print(bartlett_test_result)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  stem_count by Type
## Bartlett's K-squared = 4.65, df = 2, p-value = 0.09778
#### ANOVA ####
# ANOVA
anova_result <- aov(stem_count ~ Type, data = stem_density)

# Print ANOVA result
print("ANOVA:")
## [1] "ANOVA:"
print(summary(anova_result))
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Type         2   3324  1661.8   17.63 0.000371 ***
## Residuals   11   1037    94.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Post-hoc Tukey test
posthoc_tukey <- TukeyHSD(anova_result)

# Print post-hoc test result
print("Post-hoc Tukey Test:")
## [1] "Post-hoc Tukey Test:"
print(posthoc_tukey)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = stem_count ~ Type, data = stem_density)
## 
## $Type
##                          diff        lwr       upr     p adj
## marantaceae-established -25.8 -42.383748 -9.216252 0.0038769
## riverine-established     11.2  -6.389721 28.789721 0.2414447
## riverine-marantaceae     37.0  19.410279 54.589721 0.0003827
## autocor. mar has. the rest do not
established <- stem_density[1:5,3]
established <- established$stem_count # to a vector
riverine <- stem_density[11:14,3]
riverine <- riverine$stem_count # to a vector
marantaceae <- stem_density[6:10,3]
marantaceae <- marantaceae$stem_count # to a vector
Moran.I(riverine, RIV.dists.inv, scaled = TRUE)
## $observed
## [1] -0.8200796
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.5291706
## 
## $p.value
## [1] 0.3576623
Moran.I(marantaceae, MAR.dists.inv, scaled = TRUE) 
## $observed
## [1] 0.1417477
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.1986743
## 
## $p.value
## [1] 0.04863142
Moran.I(established, DEN.dists.inv, scaled = TRUE)
## $observed
## [1] -0.5565877
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.3574661
## 
## $p.value
## [1] 0.391075

APPLY WD VAR TO DBH !DO NOT RUN!

#SDBH <- subset_DBH 
#SDBH$WD <- NA # initialize WD column 
#SDBH$var <- NA # initialize variance column
#SDBH$WD_ceil <- NA  # initialize WD ceiling column 
#SDBH$WD_floor <- NA  # initialize WD floor column 

#for (i in 1:nrow(SDBH)) { # loop through small trees rows
 # hab_class <- SDBH[i,2] 
  #  RowCode <- SDBH[i,3] # get species code
   # SPoi <- RowCode$Sp # get sp subset
    #row_placement <- which(SPDWDV$Code == SPoi[1]) # find its matching row in SPDWDV same for SPDWD
    #VVal <- SPDWDV$v[row_placement] # get the v value at the row number
    #SDBH$var[i] <- VVal # add it in the PS dataframe
  #  awd <- SPDWD$AVG_WOOD_D[row_placement]
   # SDBH$WD[i] <- awd
   # SDBH$WD_ceil[i] <- awd + ((1-VVal)*awd)
    #SDBH$WD_floor[i] <- awd - ((1-VVal)*awd)
#}
#write.csv(SDBH, file = "SDBH.csv", row.names = FALSE)
# resulting in output: SDBH

Carbon from plots

#### TO ASSIGN WOOD DENSITIES ####
# code will occasionally stop and throw an error, numbers are correct if the code works, so make sure to check the tables and rerun sections that give NAs.
#subset_DBH$WD <- NA
#for (i in 1:nrow(subset_DBH)) {
 # DBHsp <- subset_DBH$Sp[i]
  #matching_indices <- which(SPDWD$Code == DBHsp)
  
  # Check if there are matching indices
  #if (length(matching_indices) > 0) {
   # w <- SPDWD$AVG_WOOD_D[matching_indices[1]]  # Assuming unique matches
    
    # Check if w is not NA before assignment
    #if (!is.na(w)) {
     # subset_DBH$WD[i] <- w
    #}
  #}
#}


#### SPLIT DATA ####
# separate by plot
plot_types <- unique(subD$Plot)

# list of data frames for each plot type
split_data <- lapply(plot_types, function(plot_type) {
  subset_DBH_subset <- subset(subD, Plot == plot_type)
  return(subset_DBH_subset)
})


#### RUN ALLOMETRY ####
# allometry per tree of AGB in kg/20mx20m area

C <- list() # initialize
mat <- matrix(NA, nrow = length(split_data), ncol = 2) # create mat to fill

for (i in 1:length(split_data)) { # loop through split data
plot <- split_data[[i]] # subset based on loop
plot$AGB <- NA # new column of C
#p x exp( -1.499 + 2.148 ln (D) + 0.207(ln(D))^2-0.0281(ln(D))^3) -- allometry
plot$AGB <- with(plot, WD * exp(-1.499 + 2.148 * log(DBH) + 0.207 * log(DBH)^2 - 0.0281 * log(DBH)^3)) # apply allometry

C[[i]] <- (plot) # get plot information
mat[i,1] <- (plot$Plot[1]) # name
mat[i,2] <- (sum(plot$AGB)) # sum of all individual trees
}
mat[, 2] <- as.numeric(mat[, 2]) # make sure numeric data

# column information
R_mat <- mat[c(1, 2, 8, 9), ] 
F_mat <- mat[c(3, 4, 5, 6, 7), ]
M_mat <- mat[c(10, 11, 12, 13, 14), ]

# Convert character vectors to numeric just in case
R_mat <- as.numeric(R_mat)
## Warning: NAs introduced by coercion
M_mat <- as.numeric(M_mat)
## Warning: NAs introduced by coercion
F_mat <- as.numeric(F_mat)
## Warning: NAs introduced by coercion
# Combine values into a list
data_list <- list(R_mat, M_mat, F_mat)

# to save the PDF
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/C.pdf"
#pdf(pdf_path)



#### PLOT ####
# box plots side by side
boxplot(data_list, col = c('plum3', 'darkseagreen2', 'chartreuse4'), names = c("R_mat", "M_mat", "F_mat"),
        main = "Box Plots", ylab = "Values") # sum per plot 

# Convert to Mg/ha
mat <- as.data.frame(mat)
mat[,2] <- as.numeric(as.character(mat[,2]))
mat$CMgHa <- mat[,2] * 0.001/ 0.04
mat$CMgHa
##  [1] 303.71403 237.33662 172.32469 649.53696 762.72509 816.92236 485.75671
##  [8] 527.86620 208.24346 111.66959 103.58240  65.07394 274.46884 239.75876
#### ANOVA ####
# grouping variable (to do )
group <- c("riverine","riverine","established","established","established","established","established","riverine","riverine","marantaceae","marantaceae","marantaceae","marantaceae","marantaceae")

mat$GV <- group

#  ANOVA
anova_result <- aov(CMgHa ~ GV, data = mat)

# Run Tukey post-hoc test
tukey_result <- TukeyHSD(anova_result)
summary(anova_result)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## GV           2 444774  222387    6.68 0.0126 *
## Residuals   11 366187   33290                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Display detailed results
summary(tukey_result)
##    Length Class  Mode   
## GV 12     -none- numeric
tukey_result$`GV`
##                              diff       lwr        upr     p adj
## marantaceae-established -418.5425 -730.2066 -106.87832 0.0102023
## riverine-established    -258.1631 -588.7328   72.40665 0.1331967
## riverine-marantaceae     160.3794 -170.1904  490.94911 0.4187101
# mar and dense have sig differences



#### ANOVA ASSUMPTIONS ####
# shapiro
# established
shapiro_test_resultEST <- shapiro.test(mat[3:7,3])
# display the result
print(shapiro_test_resultEST)
## 
##  Shapiro-Wilk normality test
## 
## data:  mat[3:7, 3]
## W = 0.91051, p-value = 0.4707
# mar
shapiro_test_resultMAR <- shapiro.test(mat[10:14,3])
# display the result
print(shapiro_test_resultMAR)
## 
##  Shapiro-Wilk normality test
## 
## data:  mat[10:14, 3]
## W = 0.87082, p-value = 0.2697
# riv
shapiro_test_resultRIV <- shapiro.test(mat[c(1,2,8,9),3])
# display the result
print(shapiro_test_resultRIV)
## 
##  Shapiro-Wilk normality test
## 
## data:  mat[c(1, 2, 8, 9), 3]
## W = 0.84761, p-value = 0.2185
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(CMgHa ~ GV, data = mat)
# display the result
print(bartlett_test_result)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  CMgHa by GV
## Bartlett's K-squared = 3.5911, df = 2, p-value = 0.166
mean(mat[10:14,2])
## [1] 6356.428
mean(mat[c(1,2,8,9),2])
## [1] 12771.6
mean(mat[3:7,2])
## [1] 23098.13
#### AUTOCORRELATION ####
Moran.I(mat[10:14,2], MAR.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.4297229
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.2175661
## 
## $p.value
## [1] 0.4087694
Moran.I(mat[c(1,2,8,9),2], RIV.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.4470218
## 
## $expected
## [1] -0.3333333
## 
## $sd
## [1] 0.2952488
## 
## $p.value
## [1] 0.700193
Moran.I(mat[3:7,2], DEN.dists.inv, scaled = TRUE) # run Moran's I
## $observed
## [1] -0.3562061
## 
## $expected
## [1] -0.25
## 
## $sd
## [1] 0.4111628
## 
## $p.value
## [1] 0.7961702

Run on ceiling and floor based on wood density variance

#### SPLIT DATA ####
# separate by plot
plot_types <- unique(SDBH$Plot)
# list of data frames for each plot type
split_data <- lapply(plot_types, function(plot_type) {
  subset_DBH_subset <- subset(SDBH, Plot == plot_type)
  return(subset_DBH_subset)
})


#### RUN ALLOMETRY ####
# allometry per tree of AGB in kg/20mx20m area
D <- list() # initialize
matCiel <- matrix(NA, nrow = length(split_data), ncol = 2) # create mat to fill
for (i in 1:length(split_data)) { # loop through split data
plot <- split_data[[i]] # subset based on loop
plot$AGB <- NA # new column of C
#p x exp( -1.499 + 2.148 ln (D) + 0.207(ln(D))^2-0.0281(ln(D))^3) -- allometry
plot$AGB <- with(plot, WD_ceil * exp(-1.499 + 2.148 * log(DBH) + 0.207 * log(DBH)^2 - 0.0281 * log(DBH)^3)) # apply allometry
D[[i]] <- (plot) # get plot information
matCiel[i,1] <- (plot$Plot[1]) # name
matCiel[i,2] <- (sum(plot$AGB)) # sum of all individual trees
}
matCiel[, 2] <- as.numeric(matCiel[, 2]) # make sure numeric data

# column information
R_mat <- matCiel[c(1, 2, 8, 9), ] 
F_mat <- matCiel[c(3, 4, 5, 6, 7), ]
M_mat <- matCiel[c(10, 11, 12, 13, 14), ]

# Convert character vectors to numeric just in case
R_mat <- as.numeric(R_mat)
## Warning: NAs introduced by coercion
M_mat <- as.numeric(M_mat)
## Warning: NAs introduced by coercion
F_mat <- as.numeric(F_mat)
## Warning: NAs introduced by coercion
# Combine values into a list
data_list <- list(na.omit(R_mat), na.omit(M_mat), na.omit(F_mat))

# Convert to Mg/ha
matCiel <- as.data.frame(matCiel)
matCiel[,2] <- as.numeric(as.character(matCiel[,2]))
matCiel$CMgHa <- matCiel[,2] * 0.001/ 0.04
matCiel$CMgHa
##  [1] 394.66337 375.92490 212.11268 723.09044 848.07714 971.90466 572.97147
##  [8] 659.65005 231.25517 123.14901 130.51382  70.47897 314.29234 291.45400
#### SPLIT DATA ####
# separate by plot
plot_types <- unique(SDBH$Plot)
# list of data frames for each plot type
split_data <- lapply(plot_types, function(plot_type) {
  subset_DBH_subset <- subset(SDBH, Plot == plot_type)
  return(subset_DBH_subset)
})


#### RUN ALLOMETRY ####
# allometry per tree of AGB in kg/20mx20m area
D <- list() # initialize
matFloor <- matrix(NA, nrow = length(split_data), ncol = 2) # create mat to fill
for (i in 1:length(split_data)) { # loop through split data
plot <- split_data[[i]] # subset based on loop
plot$AGB <- NA # new column of C
#p x exp( -1.499 + 2.148 ln (D) + 0.207(ln(D))^2-0.0281(ln(D))^3) -- allometry
plot$AGB <- with(plot, WD_floor * exp(-1.499 + 2.148 * log(DBH) + 0.207 * log(DBH)^2 - 0.0281 * log(DBH)^3)) # apply allometry
D[[i]] <- (plot) # get plot information
matFloor[i,1] <- (plot$Plot[1]) # name
matFloor[i,2] <- (sum(plot$AGB)) # sum of all individual trees
}
matFloor[, 2] <- as.numeric(matFloor[, 2]) # make sure numeric data

# column information
R_mat <- matFloor[c(1, 2, 8, 9), ] 
F_mat <- matFloor[c(3, 4, 5, 6, 7), ]
M_mat <- matFloor[c(10, 11, 12, 13, 14), ]

# Convert character vectors to numeric just in case
R_mat <- as.numeric(R_mat)
## Warning: NAs introduced by coercion
M_mat <- as.numeric(M_mat)
## Warning: NAs introduced by coercion
F_mat <- as.numeric(F_mat)
## Warning: NAs introduced by coercion
# Combine values into a list
data_list <- list(na.omit(R_mat), na.omit(M_mat), na.omit(F_mat))

# Convert to Mg/ha
matFloor <- as.data.frame(matFloor)
matFloor[,2] <- as.numeric(as.character(matFloor[,2]))
matFloor$CMgHa <- matFloor[,2] * 0.001/ 0.04
matFloor$CMgHa
##  [1] 212.76470  98.74834 132.53670 575.98348 677.37304 661.94006 398.54196
##  [8] 396.08234 185.23175 100.19017  76.65098  59.66890 234.64534 188.06352
MAT <- cbind(mat$V1, matFloor$CMgHa, mat$CMgHa, matCiel$CMgHa)

—- REMOTE SENSING DATA ANALYSIS —-

Land classification accuracy and statistics calculated via ArcGIS Pro and Excel

Clip data

#### CHM ####
# Use mask function to clip CHM to the footprint
CHM_clipped <- mask(CHM, FOOTP)
plot(CHM_clipped) # view

#### LiDAR Land Classification ####
# mask
LidLC_clipped <- mask(LidLC, FOOTP) # side-by-side plot clipped and unclipped
color_palette <- c('darkseagreen2', 'plum3', 'tan', 'midnightblue','chartreuse4', 'chocolate4', 'gray9')

par(mfrow=c(1, 2))  # side-by-side plot clipped and unclipped
plot(LidLC, col = color_palette, breaks = c(0, 1, 9, 10, 90, 100, 1000, 10000), main = "LidLC Raster", legend = TRUE)
plot(LidLC_clipped, col = color_palette, breaks = c(0, 1, 9, 10, 90, 100, 1000, 10000), main = "LidLC Raster Clip", legend = TRUE)

par(mfrow=c(1, 1))  # back to normal plot

Classify sections of the LiDAR CHM from analysis of the LiDAR land classification !DO NOT RUN! running takes a long time. Outputs already loaded in.

#### CLASSIFY POLYGONS FROM LiDAR LAND CLASS WHERE THERE IS 95% CLASSIFICATION CONFIDENCE ####
#k = 1 # initialize
#Pltz$Value <- NA # initialize new column
#Pltz_sf <- st_as_sf(Pltz, coords = c("X", "Y")) # spatial

#for (i in 1:4639) { # loop through randomly created polygons
    #extent_polygon <- st_bbox(Pltz_sf[i, "geometry"]) # get the poly extent
  #raster_cropped <- crop(LidLC_clipped, extent_polygon) # crop classification raster to selected  poly extent
  #values_in_extent <- raster::extract(raster_cropped, Pltz[i, ], buffer = 0.1) # get values from cropped raster
  #value_table <- table(unlist(values_in_extent)) # values to table
  #most_common_value <- names(which.max(value_table)) # most common value
  #proportion <- value_table[most_common_value] / sum(value_table) # how common?
  #is_95_percent_or_more <- proportion >= 0.95 # if the proportion is 95% or more
  #if (is_95_percent_or_more) { #if yes, is 95% or more
    #Pltz$Value[i] <- most_common_value # then save the most common value
  #} else { # otherwise
  #  Pltz$Value[i] <- NA # denote most common value as NA
  #}
#}



#### SUBSET POLYGONS OF 95% CLASSIFICATION CONFIDENCE ####
#subset_Pltz <- Pltz[complete.cases(Pltz$Value), ] # where Pltz$Value has value
#subset_Pltz$Value <- as.numeric(as.character(subset_Pltz$Value)) # convert to numeric
#value_counts <- table(subset_Pltz$Value) # occurrences of each land class type
#print(value_counts) # print to evaluate
# classification values to care about: 1, 9, and 100 (the forest types). 849 is minimum number of polygons in the classification values... 
# So, take randomly 849 of 882, 880, and 849.

#set.seed(123) # randomly select polygons: for replicability
# randomly select 849 rows where subset_Pltz$Value equals 1 (mar)
#MarSub <- subset_Pltz[subset_Pltz$Value == 1, ][sample(nrow(subset_Pltz[subset_Pltz$Value == 1, ]), 849), #]
#plot(MarSub) # plot polygons
# randomly select 849 rows where subset_Pltz$Value equals 9 (den)
#DenSub <- subset_Pltz[subset_Pltz$Value == 9, ][sample(nrow(subset_Pltz[subset_Pltz$Value == 9, ]), 849), #]
#plot(DenSub) # plot polygons
# randomly select 849 rows where subset_Pltz$Value equals 100 (riv)
#RivSub <- subset_Pltz[subset_Pltz$Value == 100, ][sample(nrow(subset_Pltz[subset_Pltz$Value == 100, ]), 849), ]
#plot(RivSub) # plot polygons



#### CLIP CHM TO SELECTED POLYGONS ####
#Does not always work in chunk for some reason... if this happens, run in console
#temp_dir <- tempdir() # set temporary directory to reduce clutter and confusion
#options(wd = temp_dir) # set to wd

#CHM_clipped_mar_sub <- raster::mask(CHM_clipped, MarSub) # clip CHM_clipped to MarSub polygons
#plot(CHM_clipped_mar_sub, main = "CHM_clipped to MarSub Polygons") # plot

#CHM_clipped_riv_sub <- raster::mask(CHM_clipped, RivSub) # clip CHM_clipped to RivSub polygons
#plot(CHM_clipped_riv_sub, main = "CHM_clipped to RivSub Polygons") # plot

#CHM_clipped_den_sub <- raster::mask(CHM_clipped, DenSub) # clip CHM_clipped to DenSub polygons
#plot(CHM_clipped_den_sub, main = "CHM_clipped to DenSub Polygons") # plot



#### TURN CHM CLIPS TO NON-SPATIAL DATA ####
#result_list <- list() # initialize list
#for (i in 1:nrow(MarSub)) { # loop through MarSub polygons
#chm_values <- raster::extract(CHM_clipped, MarSub[i,], df = TRUE, cellnumbers = TRUE) # get chm values for the polygon selected
#coordinates <- raster::xyFromCell(CHM_clipped, chm_values$cell) # collect the coordinates
 #result_df <- data.frame( # combine CHM value and coordinates and add to a data frame
  #  Longitude = coordinates[, 1],
   # Latitude = coordinates[, 2],
    #CHM_Value = chm_values$CHMs) 
    # result_list <- append(result_list, list(result_df)) # add data frame to a list of results
# print(i) to check the loop
#}
#Mresult_final_df <- do.call(rbind, result_list) # expand out the list to a dataframe of results

#result_list <- list() # initialize list
#for (i in 1:nrow(RivSub)) { # loop through RivSub polygons
#chm_values <- raster::extract(CHM_clipped, RivSub[i,], df = TRUE, cellnumbers = TRUE) # get chm values for the polygon selected
#coordinates <- raster::xyFromCell(CHM_clipped, chm_values$cell) # collect the coordinates
 #result_df <- data.frame( # combine CHM value and coordinates and add to a data frame
#    Longitude = coordinates[, 1],
 #   Latitude = coordinates[, 2],
  #  CHM_Value = chm_values$CHMs)
   # result_list <- append(result_list, list(result_df)) # add data frame to a list of results
# print(i) # as a check
#}
#Rresult_final_df <- do.call(rbind, result_list) # expand out the list to a dataframe of results

#result_list <- list() # initialize list
#for (i in 1:nrow(DenSub)) { # loop through DenSub polygons
#chm_values <- raster::extract(CHM_clipped, DenSub[i,], df = TRUE, cellnumbers = TRUE) # get chm values #for the polygon selected
#coordinates <- raster::xyFromCell(CHM_clipped, chm_values$cell) # collect the coordinates
# result_df <- data.frame( # combine CHM value and coordinates and add to a data frame
 #   Longitude = coordinates[, 1],
  #  Latitude = coordinates[, 2],
   # CHM_Value = chm_values$CHMs)
    #result_list <- append(result_list, list(result_df)) # add data frame to a list of results
# print(i) # a check
#}
#Dresult_final_df <- do.call(rbind, result_list) # expand out the list to a dataframe of results



#### OUTPUT NON-SPATIAL DATA TO .CSV ####
#write.csv(Mresult_final_df, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/MRes.csv", row.names = FALSE) # save
#write.csv(Rresult_final_df, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/RRes.csv", row.names = FALSE) # save
#write.csv(Dresult_final_df, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/DRes.csv", row.names = FALSE) # save



#### OUTPUT SELECTED POLYGONS TO .GEOJSON ####
#st_write(RivSub, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/RivSub.geojson", driver = "GeoJSON") # save
#st_write(DenSub, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/DenSub.geojson", driver = "GeoJSON") # save
#st_write(MarSub, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/MarSub.geojson", driver = "GeoJSON") # save



#### OUTPUT CHM CLIPS AS .TIF FILES ####
#output_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/CHM_clipped_den_sub.tif" # path
#raster::writeRaster(CHM_clipped_den_sub, filename = output_path, format = "GTiff", overwrite = TRUE) # save
#output_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/CHM_clipped_riv_sub.tif" # path
#raster::writeRaster(CHM_clipped_riv_sub, filename = output_path, format = "GTiff", overwrite = TRUE) # save
#output_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/CHM_clipped_mar_sub.tif" # path 
#raster::writeRaster(CHM_clipped_mar_sub, filename = output_path, format = "GTiff", overwrite = TRUE) # save

Canopy height histograms

#### !DO NOT RUN! FROM SPATIAL DATA ####
# the same as the non-spatial data, but takes longer. !DO NOT RUN! this code
#hist_valuesM <- hist(values(CHM_MARs), main = "Histogram of CHM_MARs", xlab = "Pixel Values", col = "skyblue", border = "black") # Mar
#hist_valuesD <- hist(values(CHM_DENs), main = "Histogram of CHM_DENs", xlab = "Pixel Values", col = "skyblue", border = "black") # Den
#hist_valuesR <- hist(values(CHM_RIVs), main = "Histogram of CHM_RIVs", xlab = "Pixel Values", col = "skyblue", border = "black") # Riv




#### FROM NON-SPATIAL DATA ####
#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/CHMMvisHist.pdf"
#pdf(pdf_path)
hist(MRes$CHM_Value) # Mar

#dev.off()

#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/CHMRvisHist.pdf"
#pdf(pdf_path)
hist(RRes$CHM_Value) # Riv

#dev.off()

#pdf_path <- "/Users/beatriceyoud/Desktop/BHY_THESIS/FIGURE_DRAFTS/CHMDvisHist.pdf"
#pdf(pdf_path)
hist(DRes$CHM_Value) # Den

#dev.off()



#### K-S TEST ####
# M and R
ks_test_MRes_RRes <- ks.test(MRes$CHM_Value, RRes$CHM_Value)
## Warning in ks.test.default(MRes$CHM_Value, RRes$CHM_Value): p-value will be
## approximate in the presence of ties
print("KS Test between MRes and RRes:")
## [1] "KS Test between MRes and RRes:"
print(ks_test_MRes_RRes) 
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  MRes$CHM_Value and RRes$CHM_Value
## D = 0.24914, p-value < 2.2e-16
## alternative hypothesis: two-sided
# M and D
ks_test_MRes_DRes <- ks.test(MRes$CHM_Value, DRes$CHM_Value)
## Warning in ks.test.default(MRes$CHM_Value, DRes$CHM_Value): p-value will be
## approximate in the presence of ties
print("KS Test between MRes and DRes:")
## [1] "KS Test between MRes and DRes:"
print(ks_test_MRes_DRes)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  MRes$CHM_Value and DRes$CHM_Value
## D = 0.31202, p-value < 2.2e-16
## alternative hypothesis: two-sided
# R and D
ks_test_RRes_DRes <- ks.test(RRes$CHM_Value, DRes$CHM_Value)
## Warning in ks.test.default(RRes$CHM_Value, DRes$CHM_Value): p-value will be
## approximate in the presence of ties
print("KS Test between RRes and DRes:")
## [1] "KS Test between RRes and DRes:"
print(ks_test_RRes_DRes)
## 
##  Asymptotic two-sample Kolmogorov-Smirnov test
## 
## data:  RRes$CHM_Value and DRes$CHM_Value
## D = 0.37502, p-value < 2.2e-16
## alternative hypothesis: two-sided
# all significantly different, but sample size is quite large, so check with false discover Benjamini-Hochberg
p_values <- c(ks_test_MRes_RRes$p.value,  ks_test_MRes_DRes$p.value, ks_test_RRes_DRes$p.value)
#p-values in a vector

# Benjamin-Hochberg correction
adjusted_p_values <- p.adjust(p_values, method = "BH")

# adjusted p-values
print(adjusted_p_values)
## [1] 0 0 0
#### ANOVA ASSUMPTIONS ####
# have to randomly select from values because sample size is too large for shapiro test
set.seed(1) # Setting a seed for reproducibility
random_subsetM <- sample(MRes$CHM_Value, size = 5000, replace = FALSE)
shapiro_test_resultMAR <- shapiro.test(random_subsetM)
# display the result
print(shapiro_test_resultMAR)
## 
##  Shapiro-Wilk normality test
## 
## data:  random_subsetM
## W = 0.92461, p-value < 2.2e-16
# have to randomly select from values because sample size is too large for shapiro test
set.seed(1) # Setting a seed for reproducibility
random_subsetR <- sample(RRes$CHM_Value, size = 5000, replace = FALSE)
shapiro_test_resultRIV <- shapiro.test(random_subsetR)
# display the result
print(shapiro_test_resultRIV)
## 
##  Shapiro-Wilk normality test
## 
## data:  random_subsetR
## W = 0.99414, p-value = 2.404e-13
# have to randomly select from values because sample size is too large for shapiro test
set.seed(1) # Setting a seed for reproducibility
random_subsetD <- sample(DRes$CHM_Value, size = 5000, replace = FALSE)
shapiro_test_resultDEN <- shapiro.test(random_subsetD)
# display the result
print(shapiro_test_resultDEN)
## 
##  Shapiro-Wilk normality test
## 
## data:  random_subsetD
## W = 0.97962, p-value < 2.2e-16
random_subsetM <- data.frame(random_subsetM)
random_subsetM$Hab <- "Mar"
names(random_subsetM) <- c("RS","Hab")
random_subsetR <- data.frame(random_subsetR)
random_subsetR$Hab <- "Riv"
names(random_subsetR) <- c("RS","Hab")
random_subsetD <- data.frame(random_subsetD)
random_subsetD$Hab <- "Den"
names(random_subsetD) <- c("RS","Hab")
A_subs <- rbind(random_subsetD,random_subsetR,random_subsetM)
  
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(RS ~ Hab, data = A_subs)
# display the result
print(bartlett_test_result)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  RS by Hab
## Bartlett's K-squared = 1343.6, df = 2, p-value < 2.2e-16
#### KW Test ####
kw_result <- kruskal.test(RS ~ Hab, data = A_subs)
# display test results
print(kw_result)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  RS by Hab
## Kruskal-Wallis chi-squared = 1407.9, df = 2, p-value < 2.2e-16
# post hoc with Dunn's 
dunn_result <- dunn.test(A_subs$RS, g = A_subs$Hab, method = "bonferroni")
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 1407.9435, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |        Den        Mar
## ---------+----------------------
##      Mar |   34.32759
##          |    0.0000*
##          |
##      Riv |   30.31946  -4.116823
##          |    0.0000*    0.0001*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
# results
print(dunn_result)
## $chi2
## [1] 1407.944
## 
## $Z
## [1] 34.327591 30.319469 -4.116824
## 
## $P
## [1] 1.521145e-258 3.174895e-202  1.920650e-05
## 
## $P.adjusted
## [1] 4.563434e-258 9.524686e-202  5.761950e-05
## 
## $comparisons
## [1] "Den - Mar" "Den - Riv" "Mar - Riv"
#### POLISHED HISTOGRAM ####
# Set up the layout for the three-panel plot
par(mfrow = c(3, 1))

# Define the x-axis range
x_range <- c(0, 50)
y_range <- c(0,.06)
# Plot histogram for MRes$CHM_Value
hist(MRes$CHM_Value, col = 'plum3', main = "Mar", xlab = "CHM_Value", ylab = "Frequency", prob = TRUE, xlim = x_range, ylim = y_range)

# Plot histogram for RRes$CHM_Value
hist(RRes$CHM_Value, col =  'chartreuse4', main = "Riv", xlab = "CHM_Value", ylab = "Frequency", prob = TRUE, xlim = x_range, ylim = y_range)

# Plot histogram for DRes$CHM_Value
hist(DRes$CHM_Value, col = 'darkseagreen2', main = "Den", xlab = "CHM_Value", ylab = "Frequency", prob = TRUE, xlim = x_range,ylim = y_range)

Canopy gap

#### !DO NOT RUN! OVERALL CANOPY GAP ####
# This section takes very long to run -- !DO NOT RUN!: values already calculated
## mar
CHM_MARs_reclassified <- raster::reclassify(CHM_MARs, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
#average_valueM <- cellStats(CHM_MARs_reclassified, stat = "mean") # calculate mean
#average_valueM # which gives ratio of 1s to 0s ... AKA gap %

## riv
CHM_RIVs_reclassified <- raster::reclassify(CHM_RIVs, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
#average_valueR <- cellStats(CHM_RIVs_reclassified, stat = "mean") # calculate mean
#average_valueR # which gives ratio of 1s to 0s ... AKA gap %

## den
CHM_DENs_reclassified <- raster::reclassify(CHM_DENs, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
#average_valueD <- cellStats(CHM_DENs_reclassified, stat = "mean") # calculate mean
#average_valueD # which gives ratio of 1s to 0s ... AKA gap %

#### 20 x 20m PLOT CANOPY GAP ####
n <- nrow(MarSub) + nrow(RivSub) + nrow(DenSub) # overall polygon numbers
gap_polygon_matrix <- matrix(nrow = n, ncol = 3) # initialize matrix
colnames(gap_polygon_matrix) <- c("gap", "polygon", "treatment") # new column names

for (i in 1:nrow(MarSub)) { # loop through mar polygons
CHM_MARs_clippedR <- crop(CHM_MARs_reclassified, extent(MarSub[i,])) # crop 1s and 0s chm to selected polygon
gap_polygon_matrix[i,1] <- cellStats(CHM_MARs_clippedR, stat = "mean") # calculate gap and add to data
gap_polygon_matrix[i,2] <- i # polygon number 
gap_polygon_matrix[i,3] <- 1 # forest type classification number
}

for (i in 1:nrow(RivSub)) { # loop through riv polygons
CHM_RIVs_clippedR <- crop(CHM_RIVs_reclassified, extent(RivSub[i,])) # crop 1s and 0s chm to selected polygon
gap_polygon_matrix[(i + nrow(MarSub)),1] <- cellStats(CHM_RIVs_clippedR, stat = "mean") # calculate gap and add to data
gap_polygon_matrix[(i + nrow(MarSub)),2] <- i  # polygon number 
gap_polygon_matrix[(i + nrow(MarSub)),3] <- 100 # forest type classification number
}

for (i in 1:nrow(DenSub)) { # loop through den polygons
CHM_DENs_clippedR <- crop(CHM_DENs_reclassified, extent(DenSub[i,])) # crop 1s and 0s chm to selected polygon
gap_polygon_matrix[(i + nrow(MarSub) + nrow(RivSub)),1] <- cellStats(CHM_DENs_clippedR, stat = "mean") # calculate gap and add to data
gap_polygon_matrix[(i + nrow(MarSub) + nrow(RivSub)),2] <- i  # polygon number 
gap_polygon_matrix[(i + nrow(MarSub) + nrow(RivSub)),3] <- 9 # forest type classification number
}



#### BOX PLOT ####
gap_polygon_df <- as.data.frame(gap_polygon_matrix) # create a data frame from the matrix
gap_polygon_df$gap <- as.numeric(gap_polygon_df$gap) # ensure gap is numeric



#### KRUSKAL WALLIS ####
kruskal_result <- kruskal.test(gap ~ treatment, data = gap_polygon_df) # run kw test
print(kruskal_result) # print results
## 
##  Kruskal-Wallis rank sum test
## 
## data:  gap by treatment
## Kruskal-Wallis chi-squared = 953.33, df = 2, p-value < 2.2e-16
posthoc_dunn <- dunn.test(gap_polygon_df$gap, gap_polygon_df$treatment, method = "bonferroni") # Dunn post hoc with bonferonni 
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 953.3166, df = 2, p-value = 0
## 
## 
##                            Comparison of x by group                            
##                                  (Bonferroni)                                  
## Col Mean-|
## Row Mean |          1        100
## ---------+----------------------
##      100 |  -30.38687
##          |    0.0000*
##          |
##        9 |  -19.93325   10.45361
##          |    0.0000*    0.0000*
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
print("Post-hoc Dunn's Test:")
## [1] "Post-hoc Dunn's Test:"
print(posthoc_dunn) # results
## $chi2
## [1] 953.3166
## 
## $Z
## [1] -30.38687 -19.93326  10.45362
## 
## $P
## [1] 4.094758e-203  1.047390e-88  7.052080e-26
## 
## $P.adjusted
## [1] 1.228427e-202  3.142169e-88  2.115624e-25
## 
## $comparisons
## [1] "1 - 100" "1 - 9"   "100 - 9"
# all significant differences
# add letters to box plot (to do)

# Define custom colors for each unique treatment
custom_colors <- c('darkseagreen2','plum3', 'chartreuse4')

# Create a box plot of gap by treatment with custom colors
boxplot(gap ~ treatment, data = gap_polygon_df,
        main = "Box Plot of Gap by Treatment",
        ylab = "Gap",
        col = custom_colors[as.factor(unique(gap_polygon_df$treatment))],
        border = "black")

#### ANOVA ASSUMPTIONS ####
shapiro_test_resultMAR <- shapiro.test(gap_polygon_df$gap[gap_polygon_df$treatment==1])
# display the result
print(shapiro_test_resultMAR)
## 
##  Shapiro-Wilk normality test
## 
## data:  gap_polygon_df$gap[gap_polygon_df$treatment == 1]
## W = 0.93913, p-value < 2.2e-16
shapiro_test_resultRIV <- shapiro.test(gap_polygon_df$gap[gap_polygon_df$treatment==9])
# display the result
print(shapiro_test_resultRIV)
## 
##  Shapiro-Wilk normality test
## 
## data:  gap_polygon_df$gap[gap_polygon_df$treatment == 9]
## W = 0.79899, p-value < 2.2e-16
shapiro_test_resultDEN <- shapiro.test(gap_polygon_df$gap[gap_polygon_df$treatment==100])
# display the result
print(shapiro_test_resultDEN)
## 
##  Shapiro-Wilk normality test
## 
## data:  gap_polygon_df$gap[gap_polygon_df$treatment == 100]
## W = 0.63623, p-value < 2.2e-16
# perform Bartlett test for homogeneity of variances
bartlett_test_result <- bartlett.test(gap ~ treatment, data = gap_polygon_df)
# display the result
print(bartlett_test_result)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  gap by treatment
## Bartlett's K-squared = 1069.3, df = 2, p-value < 2.2e-16

Canopy height statistics: mean, median, max, min, and SD

#### RECLASSIFY ####
# Reclassify for canopy height -- values to NA for values equal to and below 6 to get only trees
chm_mars_reclassified <- CHM_MARs # new data to reclass
chm_mars_reclassified[chm_mars_reclassified <= 6] <- NA # reclassify

#### STATISTICS THAT ARE NOT SD ####
## mar
MReCl <- MRes[MRes$CHM_Value > 6, ]
summary(MReCl)
##    Longitude         Latitude        CHM_Value     
##  Min.   :518665   Min.   : 66528   Min.   : 6.001  
##  1st Qu.:527770   1st Qu.: 68328   1st Qu.:17.000  
##  Median :528407   Median : 84647   Median :21.399  
##  Mean   :529427   Mean   : 80442   Mean   :21.130  
##  3rd Qu.:533234   3rd Qu.: 85211   3rd Qu.:25.439  
##  Max.   :534650   Max.   :100181   Max.   :54.591  
##  NA's   :9233     NA's   :9233     NA's   :9233
# median: 21.399, mean: 25.677, max: 57.462, min: (all mins will be around 6 because of canopy cutoff)

## riv
RReCl <- RRes[RRes$CHM_Value > 6, ]
summary(RReCl)
##    Longitude         Latitude        CHM_Value     
##  Min.   :518860   Min.   : 66663   Min.   : 6.001  
##  1st Qu.:528064   1st Qu.: 67399   1st Qu.:13.018  
##  Median :533025   Median : 67710   Median :17.851  
##  Mean   :530109   Mean   : 76511   Mean   :17.960  
##  3rd Qu.:533775   3rd Qu.: 84615   3rd Qu.:22.062  
##  Max.   :534650   Max.   :101266   Max.   :47.050  
##  NA's   :3902     NA's   :3902     NA's   :3902
# median: 17.851 , mean: 17.960 , max: 47.050, min: (all mins will be around 6 because of canopy cutoff)

## den
DReCl <- DRes[DRes$CHM_Value > 6, ]
summary(DReCl)
##    Longitude         Latitude        CHM_Value     
##  Min.   :518522   Min.   : 66494   Min.   : 6.001  
##  1st Qu.:519106   1st Qu.: 99751   1st Qu.:18.471  
##  Median :519651   Median :100288   Median :26.036  
##  Mean   :521493   Mean   : 95803   Mean   :25.735  
##  3rd Qu.:520183   3rd Qu.:100714   3rd Qu.:32.777  
##  Max.   :534503   Max.   :101250   Max.   :58.586  
##  NA's   :8601     NA's   :8601     NA's   :8601
# median: 26.036, mean: 25.735, max: 58.586, min: (all mins will be around 6 because of canopy cutoff)



#### SD ####
## mar
missing_values <- sum(is.na(MRes$CHM_Value)) # deal w missing values
if (missing_values > 0) { # if missing
  MRes <- na.omit(MRes) # omit
}
sd(MRes$CHM_Value) # calculate SD
## [1] 9.949817
#9.949817

## riv
missing_values <- sum(is.na(RRes$CHM_Value)) # deal w missing values
if (missing_values > 0) { # if missing
  RRes <- na.omit(RRes) # omit 
}
sd(RRes$CHM_Value) # calculate SD
## [1] 6.796924
#6.796924


## den
missing_values <- sum(is.na(DRes$CHM_Value)) # deal w missing values
if (missing_values > 0) { # if missing
  DRes <- na.omit(DRes) # omit 
}
sd(DRes$CHM_Value) # calculate SD
## [1] 11.55631
#11.55631

—- LANDSCAPE PATTERNS RELATED TO AGB/AGC —- Organize plotwise C information !LONG RUNTIME!

#### CARBON AND TEST PLOTS W NDVI ####
# ndvi not included in thesis, but still interesting to look at, so the code stays. The merged_data product also is used in subsequent code. WARNING: this code takes very long to run.
ndv_mat <- matrix(NA, nrow = 14, ncol = 2) # initialize

for (i in 1:14) { # loop through obs polygons
bufpts_first <- bufpts[i, ] # Subset bufpts for polygon # i
bufpts_raster_first <- rasterize(bufpts_first, ndv) # convert subset to raster for masking
ndvi_masked_first <- mask(ndv, bufpts_raster_first) # Mask ndvi using buffered points subset 
average_ndvi <- cellStats(ndvi_masked_first, stat = "mean", na.rm = TRUE) # calc avg ndvi
ndv_mat[i,1] <- average_ndvi # apply to recently initialized matrix
ndv_mat[i,2] <- bufpts$plot[i]  # add bufpts information too
}

# order check so that all plots line up
mat <- mat[order(mat[, 1]), ]
ndv_mat <- ndv_mat[order(ndv_mat[, 2]), ]
# Also, floor and ceiling
MAT <- MAT[order(MAT[, 1]), ]

# merge together: uncommented includes floor and ceiling
#merged_data <- cbind(ndv_mat, mat[, -1])
merged_data <- cbind(ndv_mat, MAT[,2:4])

# Assign new row names
colnames(merged_data) <- c("NDVI","PLOT","CMgHaFloor","CMgHa","CMgHaCeil")

Carbon and canopy gap relationship

# this also looks at LANDSAT band 4. While not in the thesis, interesting relationship. The product of this code is also used for the rest of the thesis code.

marpnts <- bufpts[6:10,] # subset mar points
CHM_clipped_Reclass <- raster::reclassify(CHM_clipped, matrix(c(-Inf, 6, 0, 6, Inf, 1), ncol = 3, byrow = TRUE)) # reclassify values so that 6 m and above are canopy
gm <- matrix(nrow = 14, ncol = 4) # initialize
# clip CHM_clipped to MarSub polygons
for (i in 1:nrow(marpnts)) { # loop through mar polygons
CHM_MARs_clippedR <- crop(CHM_clipped_Reclass, extent(marpnts[i,])) # crop 1s and 0s chm to selected polygon
B4_MARs_clippedR <- crop(B4, extent(marpnts[i,])) # crop 1s and 0s chm to selected polygon

gm[i,1] <- cellStats(CHM_MARs_clippedR, stat = "mean") # calculate gap and add to data
gm[i,2] <- i # polygon number 
gm[i,3] <- 1 # forest type classification number
gm[i,4] <- cellStats(B4_MARs_clippedR, stat = "mean") # band 4 avg

}

MARmerged_data <- data.frame(merged_data[6:10,]) # subset from merged_data for mar info
MARmerged_data$GAP <- gm[1:5,1] # get gap info
MARmerged_data$B4 <- gm[1:5,4] # band 4 
#colnames(MARmerged_data) <- c("NDVI", "PLOT", "CMgHaFloor", "CMgHa", "CMgHaCeil", "GAP", "B4") # change naming options
MARmerged_data$NDVI <- as.numeric(MARmerged_data$NDVI) # make sure numeric



#### GAP - C MODEL ####
#linear regression model: gap explains carbon var quite well with r^ of
modelMar <- lm(CMgHa ~ GAP, data = MARmerged_data)
# summary of the model
summary(modelMar)
## 
## Call:
## lm(formula = CMgHa ~ GAP, data = MARmerged_data)
## 
## Residuals:
##       1       2       3       4       5 
## -11.997  38.557  14.284 -48.022   7.179 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -106.96      59.96  -1.784   0.1724  
## GAP           368.07      79.71   4.618   0.0191 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.38 on 3 degrees of freedom
## Multiple R-squared:  0.8767, Adjusted R-squared:  0.8355 
## F-statistic: 21.32 on 1 and 3 DF,  p-value: 0.01911
#RMSE
# Residuals
residuals <- c(-11.997, 38.557, 14.284, -48.022, 7.179)
# Mean Squared Residuals
mse <- mean(residuals^2)
# RMSE
rmse <- sqrt(mse)
# Print RMSE
print(rmse)
## [1] 28.95601
#### GAP - C MODEL ####
#linear regression model: gap explains carbon var quite well with r^ of
modelMarFLOOR <- lm(CMgHaFloor ~ GAP, data = MARmerged_data)
# summary of the model
summary(modelMarFLOOR)
## 
## Call:
## lm(formula = CMgHaFloor ~ GAP, data = MARmerged_data)
## 
## Residuals:
##       1       2       3       4       5 
##  -7.412  36.241  22.660 -49.411  -2.077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   -78.55      60.92  -1.290   0.2876  
## GAP           291.27      80.99   3.596   0.0369 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 37.98 on 3 degrees of freedom
## Multiple R-squared:  0.8117, Adjusted R-squared:  0.749 
## F-statistic: 12.93 on 1 and 3 DF,  p-value: 0.03685
#RMSE
# Residuals
residuals <- c(-7.412, 36.241, 22.660, -49.411, -2.077)
# Mean Squared Residuals
mse <- mean(residuals^2)
# RMSE
rmse <- sqrt(mse)
# Print RMSE
print(rmse)
## [1] 29.41967
#### GAP - C MODEL ####
#linear regression model: gap explains carbon var quite well with r^ of
modelMarCEIL <- lm(CMgHaCeil ~ GAP, data = MARmerged_data)
# summary of the model
summary(modelMarCEIL)
## 
## Call:
## lm(formula = CMgHaCeil ~ GAP, data = MARmerged_data)
## 
## Residuals:
##       1       2       3       4       5 
## -16.581  40.872   5.908 -46.633  16.435 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -135.38      61.60  -2.198   0.1154  
## GAP           444.87      81.89   5.432   0.0122 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.41 on 3 degrees of freedom
## Multiple R-squared:  0.9077, Adjusted R-squared:  0.877 
## F-statistic: 29.51 on 1 and 3 DF,  p-value: 0.01224
#RMSE
# Residuals
residuals <- c(-16.581, 40.872, 5.908, -46.633, 16.435)
# Mean Squared Residuals
mse <- mean(residuals^2)
# RMSE
rmse <- sqrt(mse)
# Print RMSE
print(rmse)
## [1] 29.74929

Calculate C predictions on the cgap raster

all_values <- raster::getValues(TENmresGap)
#non_na_values <- na.omit(all_values)
#view(non_na_values) # for visualization without NA values
new_data <- data.frame(GAP = all_values) # dataframe

# Use the predict function to get predictions from the created model
predictions <- predict(modelMar, newdata = new_data)

predicted_raster <- raster(TENmresGap) # transform to a raster
values(predicted_raster) <- predictions # predictions to the raster

#writeRaster(predicted_raster, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/predicted_raster.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif



#### floor ####
all_values <- raster::getValues(TENmresGap)
#non_na_values <- na.omit(all_values)
#view(non_na_values) # for visualization without NA values
new_dataFLOOR <- data.frame(GAP = all_values) # dataframe

# Use the predict function to get predictions from the created model
predictionsFLOOR <- predict(modelMarFLOOR, newdata = new_dataFLOOR)

predicted_rasterFLOOR <- raster(TENmresGap) # transform to a raster
values(predicted_rasterFLOOR) <- predictionsFLOOR # predictions to the raster

#writeRaster(predicted_rasterFLOOR, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/predicted_rasterFLOOR.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif

#### ceil####
all_values <- raster::getValues(TENmresGap)
#non_na_values <- na.omit(all_values)
#view(non_na_values) # for visualization without NA values
new_dataCEIL <- data.frame(GAP = all_values) # dataframe

# Use the predict function to get predictions from the created model
predictionsCEIL <- predict(modelMarCEIL, newdata = new_dataCEIL)

predicted_rasterCEIL <- raster(TENmresGap) # transform to a raster
values(predicted_rasterCEIL) <- predictionsCEIL # predictions to the raster

#writeRaster(predicted_rasterCEIL, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/predicted_rasterCEIL.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif

Corrected C for only Mar

# In QGIS, I separated out the C predictions so that it is only for mar pixles
reclassified_C_onlyMAR <- C_onlyMAR # new output to be reclassified
reclassified_C_onlyMAR[reclassified_C_onlyMAR < 0] <- 0 # reclassify now so that any predictions that are negative are corrected to 0
plot(reclassified_C_onlyMAR) # view

#writeRaster(reclassified_C_onlyMAR, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/reclassified_C_onlyMAR.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif

all_values <- raster::getValues(reclassified_C_onlyMAR) # extract values
A <- na.omit(all_values) # get values without NAs

#### Ceil and floor ####
reclassified_C_onlyMARF <- C_onlyMARF # new output to be reclassified
reclassified_C_onlyMARF[reclassified_C_onlyMARF < 0] <- 0 # reclassify now so that any predictions that are negative are corrected to 0
plot(reclassified_C_onlyMARF) # view

writeRaster(reclassified_C_onlyMARF, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/reclassified_C_onlyMARF.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif

all_values <- raster::getValues(reclassified_C_onlyMARF) # extract values
FL <- na.omit(all_values) # get values without NAs

reclassified_C_onlyMARC <- C_onlyMARC # new output to be reclassified
reclassified_C_onlyMARC[reclassified_C_onlyMARC < 0] <- 0 # reclassify now so that any predictions that are negative are corrected to 0
plot(reclassified_C_onlyMARC) # view

writeRaster(reclassified_C_onlyMARC, "/Users/beatriceyoud/Desktop/BHY_THESIS/DATA/reclassified_C_onlyMARC.tif", format = "GTiff", overwrite = TRUE) # write and save as .tif

all_values <- raster::getValues(reclassified_C_onlyMARC) # extract values
CL <- na.omit(all_values) # get values without NAs

AGB for the Mar in the CHM

AA <- data.frame(A)
AA$upper <- NA
AA$lower <- NA
AA$upper <- AA$A+37.98
AA$lower <- AA$A-37.98

SumA <- sum(AA$A)*.01 # 100 to match landsat
SumAupper <- sum(AA$upper)*.01 # 100 to match landsat
SumAlower <- sum(AA$lower)*.01 # 100 to match landsat

CLL <- data.frame(CL)
CLL$upper <- NA
CLL$lower <- NA
CLL$upper <- CLL$CL+38.41
CLL$lower <- CLL$CL-38.41

SumCL <- sum(CLL$CL)*.01 # 100 to match landsat
SumCLupper <- sum(CLL$upper)*.01 # 100 to match landsat
SumCLlower <- sum(CLL$lower)*.01 # 100 to match landsat

FLL <- data.frame(FL)
FLL$upper <- NA
FLL$lower <- NA
FLL$upper <- FLL$FL+37.38
FLL$lower <- FLL$FL-37.38

SumFL <- sum(FLL$FL)*.01 # 100 to match landsat
SumFLupper <- sum(FLL$upper)*.01 # 100 to match landsat
SumFLlower <- sum(FLL$lower)*.01 # 100 to match landsat

View C for mar in park and expand

# look at c data
hist(A)

hist(CL)

hist(FL)

# get avg C sequestration
MMar <- mean(A) #170.258 Mg/Ha
MMarAGC <- MMar*0.47 # 80.02126 MgC/Ha
# get ceil  
MMarCL <- mean(CL) #207.8015 Mg/Ha
MMarAGCCL <- MMarCL*0.47 # 97.6667 MgC/Ha
# get floor
MMarFL <- mean(FL) #145.6434 Mg/Ha
MMarAGCFL <- MMarFL*0.47 # 68.45238 MgC/Ha


#GCPLC_Vals <- raster::getValues(GCPLC) # ground points lc, examine
#frequency_counts <- table(GCPLC_Vals) #table function to get frequency counts
#GCPLC_Vals (each pixle is 10m x 10m area)
#       1        9    100   
# 48505861 51669484   20629705
# og estimate for odzala forest would be:
forested <- 20629705 + 51669484 + 48505861 # in 100m^2
marforested <- 48505861
rivforested <- 20629705 
denforested <- 51669484 

# convert to ha
HAforested <- forested*(.01) # in ha: odzala is around 1.35 million ha. Most rest is savanna  
HAmarforested <- marforested*(0.01)
HArivforested <- rivforested*(0.01)
HAdenforested <- denforested*(0.01)

# percentage of the forested park mar
pmar <- HAmarforested/HAforested # around 40%
priv <- HArivforested/HAforested # around 17%
pden <- HAdenforested/HAforested # around 42%