This is a data explainer that pulls in the necessary data files as well as associated cleaning and grouping functions. Functions can be edited in the “code/functions_for_modeldatacomparisons_dataexplainer.R” script.
View the easy to read published Markdown file here.
There are two types of information associated sample location: the moisture conditions and the tundra type. Sometimes the tundra type (non-acidic or acidic) is referred to as “area” and sometimes it’s referred to as “site.” Similarly, moisture conditions (dry, mesic, and hydric) are sometimes called “site” and sometimes called “position”.
There are issues with capitalization. If dataframes aren’t combining, it’s a good chance that the issue is “Dry” and “dry” being unable to merge.
Sometimes tundra types are referred to as “west” and “east”. West corresponds to non-acidic tundra and east corresponds to acidic tundra. This issue is the result of my data processing being focused on the figures/tables as output.
I will add in the following datasets as we get them
Ferrous iron
Soil texture
rhizon ICP data for 2022
I need to add in the following datasets that we do have
knitr::opts_chunk$set(echo = TRUE)
source("code/0-packages.R")
source("code/functions_for_modeldatacomparisons_dataexplainer.R")
artifacts and frozen values removed and with hydrogen electrode correction
knitr::opts_chunk$set(echo = TRUE)
redox_withdepths2022 = read.csv("processed/allcombine_2022_frozen.csv")
redox_withdepths2021 = read.csv("processed/allcombine_2021_frozen.csv")
data with artifacts and frozen values removed and with hydrogen electrode correction
temp_sal_moist2021 = read.csv("processed/final_temp_salinity_avgs.csv")
temp_sal_moist2022 = read.csv("processed/2022final_temp_salinity_avgs.csv")
thaw_depth2021 = read.csv("raw/thaw_depth_2021.csv")
thaw_depth2022 = read.csv("raw/thaw_depth_2022.csv")
bd_grav = read.csv("raw/PhIr2021_Soil_Inventory_bd.csv")
All porewater data below is in a long format. I can include a function for pivoting wider if that’s helpful.
#depths for sipper porewater samples
porewater_ICP_2022 = read.csv("processed/ICP_2022.csv")
#rhizon samplers (only one depth)
porewater_ICP_2021 = read.csv("processed/rhizon_2021.csv")
Everything must be run sequentially
#remove frozen data
#avg_values_fixed = redox potential (mV)
redox_unfrozen_2021 = redox2021_remove_frozenpoints_function(redox_withdepths2021)
redox_unfrozen_2022 = redox2022_remove_frozenpoints_function(redox_withdepths2022)
#group by season
#redox_avg_mV = redox potential (mV)
redox_grouped_2021 = redox_2021_groupbytemporal_function(redox_unfrozen_2021)
## `summarise()` has grouped output by 'site', 'position', 'depth_cm'. You can
## override using the `.groups` argument.
redox_grouped_2022 = redox_2022_groupbytemporal_function(redox_unfrozen_2022)
## `summarise()` has grouped output by 'site', 'position', 'depth_cm'. You can
## override using the `.groups` argument.
# figure test
redox_grouped_2021 %>%
mutate(month = factor(month, levels = c("early summer", "mid summer", "late summer", "early fall"))) %>%
mutate(site = factor(site, levels = c("non-acidic tundra", "acidic tundra"))) %>%
ggplot(aes(y = depth_cm, x = redox_avg_mV, color = month, fill = month), group = 'position')+
geom_point(size = 3, alpha = 0.8, shape = c(21))+
geom_line(orientation = "y", show.legend = FALSE, linetype = "longdash")+
geom_errorbar(aes(xmin=redox_avg_mV-redox_sd, xmax=redox_avg_mV+redox_sd), show.legend = FALSE)+
scale_color_manual(values = (pnw_palette('Sunset', 4)))+
scale_fill_manual(values = (pnw_palette('Sunset', 4)))+
ylim(60, 0)+
labs(x = '2021
redox potential (mV)',
y = "",
color = "", fill = "")+
scale_x_continuous(position="top")+
facet_grid(position~site, switch = "x")+
theme_er1()+
theme(legend.position = "right",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
strip.placement = "outside",
axis.text.y=element_blank(), #remove y axis labels
axis.ticks.y=element_blank(),
axis.text.x = element_text(size = 7.5, hjust=0.8,vjust=0.2,angle = 90)) #remove y axis ticks
#remove y axis ticks
# figure test passed. Data is good to go.
#clean up dates, note data as frozen if below -1 C
moisturetempsal2021_cleaned = moisturetempsal_2021_cleaningfunction(temp_sal_moist2021)
moisturetempsal2022_cleaned = moisturetempsal_2022_cleaningfunction(temp_sal_moist2022)
#grouped temporal data
moisturetempsal2021_grouped = moisturetempsal_2021_groupfunction(moisturetempsal2021_cleaned)
## `summarise()` has grouped output by 'site', 'position', 'depth_cm'. You can
## override using the `.groups` argument.
moisturetempsal2022_grouped = moisturetempsal_2022_groupfunction(moisturetempsal2022_cleaned)
## `summarise()` has grouped output by 'site', 'position', 'depth_cm'. You can
## override using the `.groups` argument.
moisturetempsal2021_grouped %>%
mutate(month = factor(month, levels = c("early summer", "mid summer", "late summer", "early fall"))) %>%
mutate(site = factor(site, levels = c("non-acidic tundra", "acidic tundra"))) %>%
ggplot(aes(y = depth_cm, x = moisture_avg, color = month, fill = month), group = 'month')+
geom_point(size = 4, alpha = 0.8, shape = c(21))+
geom_line(orientation = "y", show.legend = FALSE, linetype = "longdash")+
geom_errorbar(aes(xmin=moisture_avg-moisture_sd, xmax=moisture_avg+moisture_sd), show.legend = FALSE)+
scale_color_manual(values = (pnw_palette('Sunset', 4)))+
scale_fill_manual(values = (pnw_palette('Sunset', 4)))+
ylim(60, 0)+
labs(x = '2021
soil moisture (%)',
y = "depth (cm)",
color = "", fill = "")+
scale_x_continuous(position="top")+
facet_grid(position~site, switch = "x")+
theme_er1()+
theme(legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
strip.placement = "outside",
strip.text.y = element_blank(),
axis.text.x = element_text(size = 7.5, hjust=0.8,vjust=0.2,angle = 90))
#clean up data, rename areas for consistency, calculate volumetric moisture, simplify soil material type
bd_cleaned = soil_properties_cleaningfunction(bd_grav)
#select only necessary columns, simplify date-plot labels, set levels for material/horizons and date-plots, group by Area, Site, Horizon and summarise mean and sd for bulk density and volumetric water content
bd_grouped = soil_properties_groupingfunction(bd_cleaned)
## `summarise()` has grouped output by 'Area', 'Site'. You can override using the
## `.groups` argument.
#thaw depth cleaning
thawdepths_2021_cleaned = thawdepths_2021_cleaningfunction(thaw_depth2021)
thawdepths_2022_cleaned = thawdepths_2022_cleaningfunction(thaw_depth2022)
#thaw depth figure test
thawdepths_2021_cleaned %>%
mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric"))) %>%
#filter(Area == "non-acidic tundra") %>%
ggplot()+
geom_violin(aes(x = as.Date(date2), y = thaw_depth_cm, group = as.Date(date2), fill = Site), alpha = 0.4)+
labs(x = " ",
y = "Thaw Depth, cm")+
#geom_rect(aes(xmin=as_date('2021-06-15'), xmax= as_date('2021-08-09'), ymin=49.5, ymax=50.5), fill = "black")+
#scale_fill_gradientn(colors = natparks.pals(name = "Banff"))+
scale_fill_manual(values = c("#9a031e", "#40916c", "#118ab2"))+
scale_x_date(date_breaks = "1 week", date_labels = "%b-%d-%Y")+
ylim(90, 0)+
facet_grid(Site~Area)+
theme_er1()+
theme(legend.position = "none", axis.text.x = element_text (vjust = 0.5, hjust=1, angle = 90, size = 9))
## Warning: Removed 1 rows containing non-finite values (stat_ydensity).
#bulk density figure test
#cleaning
porewater_ICP_2021_cleaned = rhizon_cleaningfunction(porewater_ICP_2021)
#2022 ICP sipper porewater data comes pre-cleaned
#grouping
porewater_ICP_2021_grouped = rhizon_groupingfunction(porewater_ICP_2021_cleaned)
## `summarise()` has grouped output by 'Area', 'Site', 'month', 'Betterdate'. You
## can override using the `.groups` argument.
porewater_ICP_2022_grouped = sipper_groupingfunction(porewater_ICP_2022)
## `summarise()` has grouped output by 'Area', 'Site', 'Depth_cm', 'date'. You can
## override using the `.groups` argument.
#rhizon 2021 test figure
porewater_ICP_2021_grouped %>%
mutate(Area = recode(Area, "West" = "non-acidic tundra",
"East" = "acidic tundra")) %>%
mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>%
filter(ICP %in% c("Ca_ug/mL")) %>%
# mutate(ICP = recode(ICP, "calcium" = "calcium μg/mL",
# "iron" = "iron μg/mL",
# "aluminum" = "aluminum μg/mL",
# "phosphorus" = "phosphorus μg/mL")) %>%
ggplot(aes(x = Betterdate, y = mean, color = Site)) +
geom_line(aes(group = area_site, linetype = Area), size = 0.7, orientation = "x")+
geom_point(aes(group = area_site, shape = Area), size = 3.5)+
# scale_fill_gradientn(colors = (pnw_palette("Shuksan2")))+
# scale_color_gradientn(colors = (pnw_palette("Shuksan2")))+
scale_fill_manual(values = c("#f07167", "#a7c957", "#1e96fc", "#f07167", "#a7c957", "#1e96fc"))+
scale_color_manual(values = c("#f07167", "#a7c957", "#1e96fc", "#f07167", "#a7c957", "#1e96fc"))+
labs(color = "Moisture",
linetype = "Acidity",
y = "Concentration μg/mL",
x = " ")+
facet_grid(. ~ ICP, scales = "free_y") +
theme_er1()+
theme(axis.text.x = element_text(size = 8, hjust=0.25,vjust=0.2,angle = 45), legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())
# sipper 2022 test figure
porewater_ICP_2022_grouped %>%
filter(ICP == "Fe_ug_mL") %>%
mutate(Area = factor(Area, levels = c("non-acidic tundra", "acidic tundra"))) %>%
mutate(Site = factor(Site, levels = c("Dry", "Mesic", "Hydric"))) %>%
ggplot(aes(x = mean, y = Depth_cm, fill = date_plot, group = date_plot)) +
geom_errorbar(aes(xmin=(mean - sd), xmax=(mean + sd), color = date_plot))+
geom_point(size = 3.5, shape = c(21), alpha = 0.6)+
geom_line(aes(color = date_plot), orientation = "y", linetype = "longdash")+
# scale_fill_gradientn(colors = (pnw_palette("Shuksan2")))+
# scale_color_gradientn(colors = (pnw_palette("Shuksan2")))+
scale_fill_manual(values = (pnw_palette("Sunset2", 15)))+
scale_color_manual(values = (pnw_palette("Sunset2", 15)))+
labs(fill = "Date",
color = "Date",
y = "",
x = "Iron ug/mL")+
scale_y_reverse()+
scale_x_continuous(position = 'top') +
facet_grid(Site ~ Area, scales = "free_x")+
theme_er1()+
theme(legend.position = "none",
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(),
strip.placement = "outside")
I usually analyze the data in the same script where I’m doing data processing and cleaning, so these grouped and cleaned datasets do not always have corresponding output files (other than the figures and tables). I think the best approach is to create a modeling data folder with output files from this data explainer (see below code chunk).
It will take some communication (I’ll have to let you know if there are any changes to processing code or data files so that you can re-run this markdown file). But this file is helpful to me to because it simplifies the processing that I’m doing which will be helpful for visitors to my repository. So I have a vested interest in keeping it up to date. Another option is for me to figure out how to combine markdown file with a Drake outline, but I’m not at that stage yet.
#Redox
write.csv(redox_withdepths2021, "modeling-data-output-files/redox_withdepths2021.csv")
write.csv(redox_withdepths2022, "modeling-data-output-files/redox_withdepths2022.csv")
write.csv(redox_unfrozen_2021, "modeling-data-output-files/redox_unfrozen_2021.csv")
write.csv(redox_unfrozen_2022, "modeling-data-output-files/redox_unfrozen_2022.csv")
write.csv(redox_grouped_2021, "modeling-data-output-files/redox_grouped_2021.csv")
write.csv(redox_grouped_2022, "modeling-data-output-files/redox_grouped_2022.csv")
#Moisture Temperature Salinity
write.csv(temp_sal_moist2021, "modeling-data-output-files/temp_sal_moist2021.csv")
write.csv(temp_sal_moist2022, "modeling-data-output-files/temp_sal_moist2022.csv")
write.csv(moisturetempsal2021_cleaned, "modeling-data-output-files/moisturetempsal2021_cleaned.csv")
write.csv(moisturetempsal2022_cleaned, "modeling-data-output-files/moisturetempsal2022_cleaned.csv")
write.csv(moisturetempsal2021_grouped, "modeling-data-output-files/moisturetempsal2021_grouped.csv")
write.csv(moisturetempsal2022_grouped, "modeling-data-output-files/moisturetempsal2022_grouped.csv")
#Porewater data
write.csv(porewater_ICP_2021, "modeling-data-output-files/porewater_ICP_2021.csv")
write.csv(porewater_ICP_2022, "modeling-data-output-files/porewater_ICP_2022.csv") #comes cleaned
write.csv(porewater_ICP_2021_cleaned, "modeling-data-output-files/porewater_ICP_2021_cleaned.csv")
write.csv(porewater_ICP_2021_grouped, "modeling-data-output-files/porewater_ICP_2021_grouped.csv")
write.csv(porewater_ICP_2022_grouped, "modeling-data-output-files/porewater_ICP_2022_grouped.csv")
#Soil Properties
write.csv(thawdepths_2021_cleaned, "modeling-data-output-files/thawdepths_2021_cleaned.csv")
write.csv(thawdepths_2022_cleaned, "modeling-data-output-files/thawdepths_2022_cleaned.csv")
write.csv(bd_grav, "modeling-data-output-files/bd_grav.csv")
write.csv(bd_cleaned, "modeling-data-output-files/bd_cleaned.csv")
write.csv(bd_grouped, "modeling-data-output-files/bd_grouped.csv")