packages
knitr::opts_chunk$set(echo = TRUE)
source("code/0-packages.R")
data
bd_grav_data = read.csv("raw/PhIr2021_Soil_Inventory_bd.csv")
samples_utk = read.csv("raw/UG_PhIr_2021_airdrysoilsamples.csv")
clean data by replacing East/West with acidic tundra and non-acidic tundra. Whenever the Horizon column has an “O”, the soil_material column will mark it as organic. Same for “M” and mineral. Calculate vwc from gravimetric and bulk density data.
bd_grav_cleaned =
bd_grav_data %>%
#dplyr::select(-c(X)) %>%
mutate(Area = recode(Area, "East" = "acidic tundra",
"West" = "non-acidic tundra")) %>%
dplyr::mutate(soil_material = case_when(grepl("O",Horizon)~"organic",
grepl("M",Horizon)~"mineral")) %>%
mutate(volumetric_water_content_cm3_cm3 = soil_bulk_density_g_cm3 * grav_water_gh20_per_gdrysoil)
samples_utk_forcombining =
samples_utk %>%
mutate(driedsample_readyforshipping = "yes")
reduce data columns, create levels for Horizon data (now the label column).
bd_select =
bd_grav_cleaned %>%
dplyr::select(Sample_ID, Core_ID, Date_collected, Area, Site, Plot_num, Plot_ID,
Horizon, Depth_1_cm, Depth_2_cm, Depth_3_cm, Depth_4_cm, Average_Depth_cm, real_depth_cm,
soil_bulk_density_g_cm3, volumetric_water_content_cm3_cm3, soil_material) %>%
mutate(label = Horizon) %>%
mutate(label = factor(label, levels = c("O", "O1", "O2", "O3", "M", "M1", "M2")))
Differentiate data that has been analyzed for vwc/bulk density and data that hasn’t. Replacing all NAs with “Not Analyzed”, while true, creates issues for the depth column (switching data type from numeric to character). So that will be fixed later when plotting.
sample_status =
bd_select %>%
replace(is.na(.),"not analyzed") %>%
mutate(data = case_when(grepl("not analyzed", soil_bulk_density_g_cm3) ~ "not analyzed", TRUE ~ "analyzed"))
sample_status_readytogosamples =
bd_select %>%
rename(sampleID = Sample_ID) %>%
left_join(samples_utk_forcombining)
## Joining, by = "sampleID"
sample_status %>%
filter(data %in% "analyzed" & real_depth_cm != "not analyzed") %>%
mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>%
mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>%
mutate(real_depth_cm = as.numeric(real_depth_cm)) %>%
mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21",
"31-Jul-21", "7-Aug-21"))) %>%
ggplot()+
geom_point(aes(x = Date_collected, y = real_depth_cm, fill = data), shape = c(21), size = 3, alpha = 0.5)+
scale_fill_manual(values = c("#efc3e6"))+
labs(x = "date collected",
y = "depth",
fill = "bulk density & volumetric water")+
facet_grid(Site~Area, scales = "free")+
scale_y_reverse()+
theme_er1()+
theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())
sample_status_readytogosamples %>%
filter(driedsample_readyforshipping %in% "yes") %>%
mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>%
mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>%
mutate(real_depth_cm = as.numeric(real_depth_cm)) %>%
mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21",
"31-Jul-21", "7-Aug-21"))) %>%
ggplot()+
geom_point(aes(x = Date_collected, y = real_depth_cm, fill = driedsample_readyforshipping), shape = c(21), size = 3, alpha = 0.5)+
scale_fill_manual(values = c("#f4acb7"))+
labs(x = "date collected",
y = "depth",
fill = "dried samples at UTK")+
facet_grid(Site~Area, scales = "free")+
scale_y_reverse()+
theme_er1()+
theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())
## Warning: Removed 1 rows containing missing values (geom_point).
sample_status %>%
filter(data %in% "not analyzed" & real_depth_cm != "not analyzed") %>%
mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>%
mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>%
mutate(real_depth_cm = as.numeric(real_depth_cm)) %>%
mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21",
"31-Jul-21", "7-Aug-21"))) %>%
ggplot()+
geom_point(aes(x = Date_collected, y = real_depth_cm, fill = data), shape = c(21), size = 3, alpha = 0.5)+
scale_fill_manual(values = c("#b8bedd"))+
labs(x = "date collected",
y = "depth",
fill = "bulk density & volumetric water")+
facet_grid(Site~Area, scales = "free")+
scale_y_reverse()+
theme_er1()+
theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())
sample_status_readytogosamples %>%
filter(driedsample_readyforshipping %in% NA) %>%
mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric", "Transect"))) %>%
mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>%
mutate(real_depth_cm = as.numeric(real_depth_cm)) %>%
mutate(Date_collected = factor(Date_collected, levels = c("6-Jul-21", "7-Jul-21", "13-Jul-21", "14-Jul-21", "24-Jul-21", "30-Jul-21",
"31-Jul-21", "7-Aug-21"))) %>%
ggplot()+
geom_point(aes(x = Date_collected, y = real_depth_cm, fill = driedsample_readyforshipping), shape = c(21), size = 3, alpha = 0.5)+
scale_fill_manual(values = c("#d3d3d3"))+
labs(x = "date collected",
y = "depth",
fill = "dried samples at UTK")+
facet_grid(Site~Area, scales = "free")+
scale_y_reverse()+
theme_er1()+
theme(axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9), legend.position = "top",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())
## Warning: Removed 2 rows containing missing values (geom_point).
Analyzed vs not analyzed
ncount =
sample_status %>%
group_by(data) %>%
dplyr::summarise(n = n())
print(ncount)
## # A tibble: 2 x 2
## data n
## <chr> <int>
## 1 analyzed 86
## 2 not analyzed 115
ncount_driedsamplesatUTK =
sample_status_readytogosamples %>%
group_by(driedsample_readyforshipping) %>%
dplyr::summarise(n = n())
print(ncount_driedsamplesatUTK)
## # A tibble: 2 x 2
## driedsample_readyforshipping n
## <chr> <int>
## 1 yes 71
## 2 <NA> 131
Analyzed vs not analyzed by date
ncount_bydate =
sample_status %>%
group_by(Date_collected, data) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Date_collected'. You can override using the
## `.groups` argument.
print(ncount_bydate)
## # A tibble: 13 x 3
## # Groups: Date_collected [8]
## Date_collected data n
## <chr> <chr> <int>
## 1 13-Jul-21 analyzed 18
## 2 14-Jul-21 analyzed 15
## 3 14-Jul-21 not analyzed 4
## 4 24-Jul-21 not analyzed 54
## 5 30-Jul-21 analyzed 2
## 6 30-Jul-21 not analyzed 18
## 7 31-Jul-21 analyzed 2
## 8 31-Jul-21 not analyzed 16
## 9 6-Jul-21 analyzed 16
## 10 6-Jul-21 not analyzed 1
## 11 7-Aug-21 analyzed 18
## 12 7-Aug-21 not analyzed 22
## 13 7-Jul-21 analyzed 15
ncount_bydate_driedsamplesatUTK =
sample_status_readytogosamples %>%
group_by(Date_collected, driedsample_readyforshipping) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Date_collected'. You can override using the
## `.groups` argument.
print(ncount_bydate_driedsamplesatUTK)
## # A tibble: 15 x 3
## # Groups: Date_collected [8]
## Date_collected driedsample_readyforshipping n
## <chr> <chr> <int>
## 1 13-Jul-21 yes 16
## 2 13-Jul-21 <NA> 2
## 3 14-Jul-21 yes 12
## 4 14-Jul-21 <NA> 8
## 5 24-Jul-21 <NA> 54
## 6 30-Jul-21 yes 3
## 7 30-Jul-21 <NA> 17
## 8 31-Jul-21 yes 2
## 9 31-Jul-21 <NA> 16
## 10 6-Jul-21 yes 11
## 11 6-Jul-21 <NA> 6
## 12 7-Aug-21 yes 13
## 13 7-Aug-21 <NA> 27
## 14 7-Jul-21 yes 14
## 15 7-Jul-21 <NA> 1
By site
ncount_bysite =
sample_status %>%
group_by(Area, Site, data) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Area', 'Site'. You can override using the
## `.groups` argument.
print(ncount_bysite)
## # A tibble: 14 x 4
## # Groups: Area, Site [8]
## Area Site data n
## <chr> <chr> <chr> <int>
## 1 acidic tundra Dry analyzed 18
## 2 acidic tundra Dry not analyzed 6
## 3 acidic tundra Hydric analyzed 12
## 4 acidic tundra Hydric not analyzed 15
## 5 acidic tundra Mesic analyzed 15
## 6 acidic tundra Mesic not analyzed 11
## 7 acidic tundra Transect not analyzed 27
## 8 non-acidic tundra Dry analyzed 12
## 9 non-acidic tundra Dry not analyzed 10
## 10 non-acidic tundra Hydric analyzed 15
## 11 non-acidic tundra Hydric not analyzed 9
## 12 non-acidic tundra Mesic analyzed 14
## 13 non-acidic tundra Mesic not analyzed 10
## 14 non-acidic tundra Transect not analyzed 27
ncount_bysite_driedsamplesatUTK =
sample_status_readytogosamples %>%
group_by(Area, Site, driedsample_readyforshipping) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Area', 'Site'. You can override using the
## `.groups` argument.
print(ncount_bysite_driedsamplesatUTK)
## # A tibble: 14 x 4
## # Groups: Area, Site [8]
## Area Site driedsample_readyforshipping n
## <chr> <chr> <chr> <int>
## 1 acidic tundra Dry yes 11
## 2 acidic tundra Dry <NA> 13
## 3 acidic tundra Hydric yes 9
## 4 acidic tundra Hydric <NA> 19
## 5 acidic tundra Mesic yes 13
## 6 acidic tundra Mesic <NA> 13
## 7 acidic tundra Transect <NA> 27
## 8 non-acidic tundra Dry yes 14
## 9 non-acidic tundra Dry <NA> 8
## 10 non-acidic tundra Hydric yes 14
## 11 non-acidic tundra Hydric <NA> 10
## 12 non-acidic tundra Mesic yes 10
## 13 non-acidic tundra Mesic <NA> 14
## 14 non-acidic tundra Transect <NA> 27
By soil material
ncount_bymaterial =
sample_status %>%
group_by(soil_material, data) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'soil_material'. You can override using the
## `.groups` argument.
print(ncount_bymaterial)
## # A tibble: 4 x 3
## # Groups: soil_material [2]
## soil_material data n
## <chr> <chr> <int>
## 1 mineral analyzed 13
## 2 mineral not analyzed 11
## 3 organic analyzed 73
## 4 organic not analyzed 104
ncount_bymaterial_driedsamplesatUTK =
sample_status_readytogosamples %>%
group_by(soil_material, driedsample_readyforshipping) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'soil_material'. You can override using the
## `.groups` argument.
print(ncount_bymaterial_driedsamplesatUTK)
## # A tibble: 4 x 3
## # Groups: soil_material [2]
## soil_material driedsample_readyforshipping n
## <chr> <chr> <int>
## 1 mineral yes 13
## 2 mineral <NA> 11
## 3 organic yes 58
## 4 organic <NA> 120
By Horizon
ncount_byhorizon =
sample_status %>%
group_by(Horizon, data) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Horizon'. You can override using the
## `.groups` argument.
print(ncount_byhorizon)
## # A tibble: 11 x 3
## # Groups: Horizon [7]
## Horizon data n
## <chr> <chr> <int>
## 1 M analyzed 11
## 2 M not analyzed 11
## 3 M1 analyzed 1
## 4 M2 analyzed 1
## 5 O analyzed 13
## 6 O not analyzed 7
## 7 O1 analyzed 30
## 8 O1 not analyzed 48
## 9 O2 analyzed 30
## 10 O2 not analyzed 48
## 11 O3 not analyzed 1
ncount_byhorizon_driedsamplesatUTK =
sample_status_readytogosamples %>%
group_by(Horizon, driedsample_readyforshipping) %>%
dplyr::summarise(n = n())
## `summarise()` has grouped output by 'Horizon'. You can override using the
## `.groups` argument.
print(ncount_byhorizon_driedsamplesatUTK)
## # A tibble: 11 x 3
## # Groups: Horizon [7]
## Horizon driedsample_readyforshipping n
## <chr> <chr> <int>
## 1 M yes 11
## 2 M <NA> 11
## 3 M1 yes 1
## 4 M2 yes 1
## 5 O yes 9
## 6 O <NA> 11
## 7 O1 yes 22
## 8 O1 <NA> 56
## 9 O2 yes 27
## 10 O2 <NA> 52
## 11 O3 <NA> 1