UTK cores

packages

knitr::opts_chunk$set(echo = TRUE)

source("code/0-packages.R")

data

bd_grav_data = read.csv("raw/PhIr2021_Soil_Inventory_bd.csv") 

samples_utk = read.csv("raw/UG_PhIr_2021_airdrysoilsamples.csv")

clean data by replacing East/West with acidic tundra and non-acidic tundra. Whenever the Horizon column has an “O”, the soil_material column will mark it as organic. Same for “M” and mineral. Calculate vwc from gravimetric and bulk density data.

bd_grav_cleaned =
  bd_grav_data %>% 
  #dplyr::select(-c(X)) %>% 
  mutate(Area = recode(Area, "East" = "acidic tundra",
                       "West" = "non-acidic tundra")) %>% 
  dplyr::mutate(soil_material = case_when(grepl("O",Horizon)~"organic",
                                          grepl("M",Horizon)~"mineral")) %>% 
  mutate(volumetric_water_content_cm3_cm3 = soil_bulk_density_g_cm3 * grav_water_gh20_per_gdrysoil) 


samples_utk_forcombining = 
  samples_utk %>% 
  mutate(driedsample_readyforshipping = "yes")

reduce data columns, create levels for Horizon data (now the label column).

bd_select = 
  bd_grav_cleaned %>% 
  dplyr::select(Sample_ID, Core_ID, Date_collected, Area, Site, Plot_num, Plot_ID, 
                Horizon, Depth_1_cm, Depth_2_cm, Depth_3_cm, Depth_4_cm, Average_Depth_cm, real_depth_cm, 
                soil_bulk_density_g_cm3, volumetric_water_content_cm3_cm3, soil_material) %>% 
  mutate(label = Horizon) %>% 
  mutate(label = factor(label, levels = c("O", "O1", "O2", "O3", "M", "M1", "M2")))  

Differentiate data that has been analyzed for vwc/bulk density and data that hasn’t. Replacing all NAs with “Not Analyzed”, while true, creates issues for the depth column (switching data type from numeric to character). So that will be fixed later when plotting.

sample_status =
  bd_select %>% 
  replace(is.na(.),"not analyzed")  %>% 
  mutate(data = case_when(grepl("not analyzed", soil_bulk_density_g_cm3) ~ "not analyzed", TRUE ~ "analyzed")) 

sample_status_readytogosamples =
  bd_select %>% 
  rename(sampleID = Sample_ID) %>% 
  left_join(samples_utk_forcombining) 
## Joining, by = "sampleID"

Analyzed vs Not Analyzed Sample Distributions

sample_status %>% 
  filter(data %in% "analyzed" & real_depth_cm != "not analyzed") %>% 
  mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>% 
  mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>% 
  mutate(real_depth_cm = as.numeric(real_depth_cm)) %>% 
  mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21", 
                                                            "31-Jul-21", "7-Aug-21"))) %>% 
  ggplot()+
  geom_point(aes(x = Date_collected, y = real_depth_cm, fill = data), shape = c(21), size = 3, alpha = 0.5)+
  scale_fill_manual(values = c("#efc3e6"))+
  labs(x = "date collected",
       y = "depth",
       fill = "bulk density & volumetric water")+
  facet_grid(Site~Area, scales = "free")+
  scale_y_reverse()+
  theme_er1()+
  theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank())

sample_status_readytogosamples %>% 
  filter(driedsample_readyforshipping %in% "yes") %>% 
  mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>% 
  mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>% 
  mutate(real_depth_cm = as.numeric(real_depth_cm)) %>% 
  mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21", 
                                                            "31-Jul-21", "7-Aug-21"))) %>% 
  ggplot()+
  geom_point(aes(x = Date_collected, y = real_depth_cm, fill = driedsample_readyforshipping), shape = c(21), size = 3, alpha = 0.5)+
  scale_fill_manual(values = c("#f4acb7"))+
  labs(x = "date collected",
       y = "depth",
       fill = "dried samples at UTK")+
  facet_grid(Site~Area, scales = "free")+
  scale_y_reverse()+
  theme_er1()+
  theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank())
## Warning: Removed 1 rows containing missing values (geom_point).

sample_status %>% 
  filter(data %in% "not analyzed" & real_depth_cm != "not analyzed") %>% 
  mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>% 
  mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>% 
  mutate(real_depth_cm = as.numeric(real_depth_cm)) %>% 
  mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21", 
                                                            "31-Jul-21", "7-Aug-21"))) %>% 
  ggplot()+
  geom_point(aes(x = Date_collected, y = real_depth_cm, fill = data), shape = c(21), size = 3, alpha = 0.5)+
  scale_fill_manual(values = c("#b8bedd"))+
  labs(x = "date collected",
       y = "depth",
       fill = "bulk density & volumetric water")+
  facet_grid(Site~Area, scales = "free")+
  scale_y_reverse()+
  theme_er1()+
  theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank())

sample_status_readytogosamples %>% 
  filter(driedsample_readyforshipping %in% NA) %>% 
  mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>% 
  mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>% 
  mutate(real_depth_cm = as.numeric(real_depth_cm)) %>% 
  mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21", 
                                                            "31-Jul-21", "7-Aug-21"))) %>% 
  ggplot()+
  geom_point(aes(x = Date_collected, y = real_depth_cm, fill = driedsample_readyforshipping), shape = c(21), size = 3, alpha = 0.5)+
  scale_fill_manual(values = c("#d3d3d3"))+
  labs(x = "date collected",
       y = "depth",
       fill = "dried samples at UTK")+
  facet_grid(Site~Area, scales = "free")+
  scale_y_reverse()+
  theme_er1()+
  theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank())
## Warning: Removed 2 rows containing missing values (geom_point).

Sample counts

Analyzed vs not analyzed

ncount =
  sample_status %>% 
  group_by(data) %>%
  dplyr::summarise(n = n())

print(ncount)
## # A tibble: 2 x 2
##   data             n
##   <chr>        <int>
## 1 analyzed        86
## 2 not analyzed   115
ncount_driedsamplesatUTK =
  sample_status_readytogosamples %>% 
  group_by(driedsample_readyforshipping) %>%
  dplyr::summarise(n = n())

print(ncount_driedsamplesatUTK)
## # A tibble: 2 x 2
##   driedsample_readyforshipping     n
##   <chr>                        <int>
## 1 yes                             71
## 2 <NA>                           131

Analyzed vs not analyzed by date

ncount_bydate =
  sample_status %>% 
  group_by(Date_collected, data) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Date_collected'. You can override using the
## `.groups` argument.
print(ncount_bydate)
## # A tibble: 13 x 3
## # Groups:   Date_collected [8]
##    Date_collected data             n
##    <chr>          <chr>        <int>
##  1 13-Jul-21      analyzed        18
##  2 14-Jul-21      analyzed        15
##  3 14-Jul-21      not analyzed     4
##  4 24-Jul-21      not analyzed    54
##  5 30-Jul-21      analyzed         2
##  6 30-Jul-21      not analyzed    18
##  7 31-Jul-21      analyzed         2
##  8 31-Jul-21      not analyzed    16
##  9 6-Jul-21       analyzed        16
## 10 6-Jul-21       not analyzed     1
## 11 7-Aug-21       analyzed        18
## 12 7-Aug-21       not analyzed    22
## 13 7-Jul-21       analyzed        15
ncount_bydate_driedsamplesatUTK =
  sample_status_readytogosamples %>% 
  group_by(Date_collected, driedsample_readyforshipping) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Date_collected'. You can override using the
## `.groups` argument.
print(ncount_bydate_driedsamplesatUTK)
## # A tibble: 15 x 3
## # Groups:   Date_collected [8]
##    Date_collected driedsample_readyforshipping     n
##    <chr>          <chr>                        <int>
##  1 13-Jul-21      yes                             16
##  2 13-Jul-21      <NA>                             2
##  3 14-Jul-21      yes                             12
##  4 14-Jul-21      <NA>                             8
##  5 24-Jul-21      <NA>                            54
##  6 30-Jul-21      yes                              3
##  7 30-Jul-21      <NA>                            17
##  8 31-Jul-21      yes                              2
##  9 31-Jul-21      <NA>                            16
## 10 6-Jul-21       yes                             11
## 11 6-Jul-21       <NA>                             6
## 12 7-Aug-21       yes                             13
## 13 7-Aug-21       <NA>                            27
## 14 7-Jul-21       yes                             14
## 15 7-Jul-21       <NA>                             1

By site

ncount_bysite =
  sample_status %>% 
  group_by(Area, Site, data) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Area', 'Site'. You can override using the
## `.groups` argument.
print(ncount_bysite)
## # A tibble: 14 x 4
## # Groups:   Area, Site [8]
##    Area              Site     data             n
##    <chr>             <chr>    <chr>        <int>
##  1 acidic tundra     Dry      analyzed        18
##  2 acidic tundra     Dry      not analyzed     6
##  3 acidic tundra     Hydric   analyzed        12
##  4 acidic tundra     Hydric   not analyzed    15
##  5 acidic tundra     Mesic    analyzed        15
##  6 acidic tundra     Mesic    not analyzed    11
##  7 acidic tundra     Transect not analyzed    27
##  8 non-acidic tundra Dry      analyzed        12
##  9 non-acidic tundra Dry      not analyzed    10
## 10 non-acidic tundra Hydric   analyzed        15
## 11 non-acidic tundra Hydric   not analyzed     9
## 12 non-acidic tundra Mesic    analyzed        14
## 13 non-acidic tundra Mesic    not analyzed    10
## 14 non-acidic tundra Transect not analyzed    27
ncount_bysite_driedsamplesatUTK =
  sample_status_readytogosamples %>% 
  group_by(Area, Site, driedsample_readyforshipping) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Area', 'Site'. You can override using the
## `.groups` argument.
print(ncount_bysite_driedsamplesatUTK)
## # A tibble: 14 x 4
## # Groups:   Area, Site [8]
##    Area              Site     driedsample_readyforshipping     n
##    <chr>             <chr>    <chr>                        <int>
##  1 acidic tundra     Dry      yes                             11
##  2 acidic tundra     Dry      <NA>                            13
##  3 acidic tundra     Hydric   yes                              9
##  4 acidic tundra     Hydric   <NA>                            19
##  5 acidic tundra     Mesic    yes                             13
##  6 acidic tundra     Mesic    <NA>                            13
##  7 acidic tundra     Transect <NA>                            27
##  8 non-acidic tundra Dry      yes                             14
##  9 non-acidic tundra Dry      <NA>                             8
## 10 non-acidic tundra Hydric   yes                             14
## 11 non-acidic tundra Hydric   <NA>                            10
## 12 non-acidic tundra Mesic    yes                             10
## 13 non-acidic tundra Mesic    <NA>                            14
## 14 non-acidic tundra Transect <NA>                            27

By soil material

ncount_bymaterial =
  sample_status %>% 
  group_by(soil_material, data) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'soil_material'. You can override using the
## `.groups` argument.
print(ncount_bymaterial)
## # A tibble: 4 x 3
## # Groups:   soil_material [2]
##   soil_material data             n
##   <chr>         <chr>        <int>
## 1 mineral       analyzed        13
## 2 mineral       not analyzed    11
## 3 organic       analyzed        73
## 4 organic       not analyzed   104
ncount_bymaterial_driedsamplesatUTK =
  sample_status_readytogosamples %>% 
  group_by(soil_material, driedsample_readyforshipping) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'soil_material'. You can override using the
## `.groups` argument.
print(ncount_bymaterial_driedsamplesatUTK)
## # A tibble: 4 x 3
## # Groups:   soil_material [2]
##   soil_material driedsample_readyforshipping     n
##   <chr>         <chr>                        <int>
## 1 mineral       yes                             13
## 2 mineral       <NA>                            11
## 3 organic       yes                             58
## 4 organic       <NA>                           120

By Horizon

ncount_byhorizon =
  sample_status %>% 
  group_by(Horizon, data) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Horizon'. You can override using the
## `.groups` argument.
print(ncount_byhorizon)
## # A tibble: 11 x 3
## # Groups:   Horizon [7]
##    Horizon data             n
##    <chr>   <chr>        <int>
##  1 M       analyzed        11
##  2 M       not analyzed    11
##  3 M1      analyzed         1
##  4 M2      analyzed         1
##  5 O       analyzed        13
##  6 O       not analyzed     7
##  7 O1      analyzed        30
##  8 O1      not analyzed    48
##  9 O2      analyzed        30
## 10 O2      not analyzed    48
## 11 O3      not analyzed     1
ncount_byhorizon_driedsamplesatUTK =
  sample_status_readytogosamples %>% 
  group_by(Horizon, driedsample_readyforshipping) %>%
  dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Horizon'. You can override using the
## `.groups` argument.
print(ncount_byhorizon_driedsamplesatUTK)
## # A tibble: 11 x 3
## # Groups:   Horizon [7]
##    Horizon driedsample_readyforshipping     n
##    <chr>   <chr>                        <int>
##  1 M       yes                             11
##  2 M       <NA>                            11
##  3 M1      yes                              1
##  4 M2      yes                              1
##  5 O       yes                              9
##  6 O       <NA>                            11
##  7 O1      yes                             22
##  8 O1      <NA>                            56
##  9 O2      yes                             27
## 10 O2      <NA>                            52
## 11 O3      <NA>                             1