library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(extrafont)
## Registering fonts with R
Noramlity Test
Read file
soil_data <- read_excel("RawdataF.xlsx", sheet = "SOIL_DATA")
print(soil_data)
## # A tibble: 46 × 10
## Code Land_Use Replication Depth pH clay silt sand som cu
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1 Farm 1 0-20 5.05 5.74 9.75 84.5 9.30 0.762
## 2 B1 Farm 2 0-20 5.08 4.24 1 94.8 6.40 0.179
## 3 C1 Farm 3 0-20 4.29 14.7 29.8 55.5 10.1 0.841
## 4 D1 Farm 4 0-20 4.54 13.2 14.8 72.0 8.93 0.646
## 5 E1 Farm 5 0-20 4.34 7.47 12.8 79.8 9.17 0.411
## 6 F1 Farm 6 0-20 4.31 4.98 12.2 82.8 5.50 0.365
## 7 G1 Farm 7 0-20 4.39 5.73 6.75 87.5 8.88 0.387
## 8 H1 Farm 8 0-20 4.47 3.98 1.5 94.5 6.45 0.256
## 9 HI1 Farm 9 0-20 4.61 8.98 17 74.0 7.97 0.539
## 10 J1 Farm 10 0-20 4.84 6.46 13.5 80.0 9.26 0.858
## # ℹ 36 more rows
Filter 0-20
# Filter for depth 0–20 cm
soil_data_filtered <- soil_data %>% filter(Depth == "0-20")
print(soil_data_filtered)
## # A tibble: 23 × 10
## Code Land_Use Replication Depth pH clay silt sand som cu
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1 Farm 1 0-20 5.05 5.74 9.75 84.5 9.30 0.762
## 2 B1 Farm 2 0-20 5.08 4.24 1 94.8 6.40 0.179
## 3 C1 Farm 3 0-20 4.29 14.7 29.8 55.5 10.1 0.841
## 4 D1 Farm 4 0-20 4.54 13.2 14.8 72.0 8.93 0.646
## 5 E1 Farm 5 0-20 4.34 7.47 12.8 79.8 9.17 0.411
## 6 F1 Farm 6 0-20 4.31 4.98 12.2 82.8 5.50 0.365
## 7 G1 Farm 7 0-20 4.39 5.73 6.75 87.5 8.88 0.387
## 8 H1 Farm 8 0-20 4.47 3.98 1.5 94.5 6.45 0.256
## 9 HI1 Farm 9 0-20 4.61 8.98 17 74.0 7.97 0.539
## 10 J1 Farm 10 0-20 4.84 6.46 13.5 80.0 9.26 0.858
## # ℹ 13 more rows
Shapiro Cu
# Subset for each land use
farm_cu <- soil_data_filtered$cu[soil_data_filtered$Land_Use == "Farm"]
forest_cu <- soil_data_filtered$cu[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm <- shapiro.test(farm_cu)
shapiro_forest <- shapiro.test(forest_cu)
print(shapiro_farm)
##
## Shapiro-Wilk normality test
##
## data: farm_cu
## W = 0.91234, p-value = 0.07063
print(shapiro_forest)
##
## Shapiro-Wilk normality test
##
## data: forest_cu
## W = 0.99775, p-value = 0.9094
Soil pH Shapiro
# Subset for each land use
farm_pH <- soil_data_filtered$pH[soil_data_filtered$Land_Use == "Farm"]
forest_pH <- soil_data_filtered$pH[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_pH <- shapiro.test(farm_pH)
shapiro_forest_pH <- shapiro.test(forest_pH)
print(shapiro_farm_pH)
##
## Shapiro-Wilk normality test
##
## data: farm_pH
## W = 0.91519, p-value = 0.08009
print(shapiro_forest_pH)
##
## Shapiro-Wilk normality test
##
## data: forest_pH
## W = 0.86694, p-value = 0.2869
OM shapiro
# Subset for each land use
farm_som <- soil_data_filtered$som[soil_data_filtered$Land_Use == "Farm"]
forest_som <- soil_data_filtered$som[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_som <- shapiro.test(farm_som)
shapiro_forest_som <- shapiro.test(forest_som)
print(shapiro_farm_som)
##
## Shapiro-Wilk normality test
##
## data: farm_som
## W = 0.91961, p-value = 0.09737
print(shapiro_forest_som)
##
## Shapiro-Wilk normality test
##
## data: forest_som
## W = 0.95353, p-value = 0.5851
clay shapiro
# Subset for each land use
farm_clay <- soil_data_filtered$clay[soil_data_filtered$Land_Use == "Farm"]
forest_clay <- soil_data_filtered$clay[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_clay <- shapiro.test(farm_clay)
shapiro_forest_clay <- shapiro.test(forest_clay)
print(shapiro_farm_clay)
##
## Shapiro-Wilk normality test
##
## data: farm_clay
## W = 0.85156, p-value = 0.005661
print(shapiro_forest_clay)
##
## Shapiro-Wilk normality test
##
## data: forest_clay
## W = 0.88194, p-value = 0.3301
silt shapiro
# Subset for each land use
farm_silt <- soil_data_filtered$silt[soil_data_filtered$Land_Use == "Farm"]
forest_silt <- soil_data_filtered$silt[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_silt <- shapiro.test(farm_silt)
shapiro_forest_silt <- shapiro.test(forest_silt)
print(shapiro_farm_silt)
##
## Shapiro-Wilk normality test
##
## data: farm_silt
## W = 0.95349, p-value = 0.4232
print(shapiro_forest_silt)
##
## Shapiro-Wilk normality test
##
## data: forest_silt
## W = 0.89286, p-value = 0.3631
sand shapiro
# Subset for each land use
farm_sand <- soil_data_filtered$sand[soil_data_filtered$Land_Use == "Farm"]
forest_sand <- soil_data_filtered$sand[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_sand <- shapiro.test(farm_sand)
shapiro_forest_sand <- shapiro.test(forest_sand)
print(shapiro_farm_sand)
##
## Shapiro-Wilk normality test
##
## data: farm_sand
## W = 0.93608, p-value = 0.202
print(shapiro_forest_sand)
##
## Shapiro-Wilk normality test
##
## data: forest_sand
## W = 0.78427, p-value = 0.07746
20-40
# Filter for depth 20-40 cm
soil_data_filtered2 <- soil_data %>% filter(Depth == "20-40")
print(soil_data_filtered2)
## # A tibble: 23 × 10
## Code Land_Use Replication Depth pH clay silt sand som cu
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A2 Farm 1 20-40 5.06 3.48 17 79.5 7.26 0.241
## 2 B2 Farm 2 20-40 5.19 4.24 1 94.8 4.6 0.157
## 3 C2 Farm 3 20-40 4.46 9.47 19 71.5 7.94 0.278
## 4 D2 Farm 4 20-40 4.52 6.73 13.5 79.8 7.67 0.236
## 5 E2 Farm 5 20-40 4.41 4.99 7.25 87.8 7.33 0.207
## 6 F2 Farm 6 20-40 4.55 3.49 7.25 89.3 3.24 0.216
## 7 G2 Farm 7 20-40 4.68 2 5 93 6.39 0.128
## 8 H2 Farm 8 20-40 4.74 2.24 6.5 91.3 4.56 0.113
## 9 I2 Farm 9 20-40 4.73 2.23 8 89.8 8.20 0.173
## 10 J2 Farm 10 20-40 4.79 5.2 12.8 82.0 7.22 0.284
## # ℹ 13 more rows
Cu
# Subset for each land use
farm_cu <- soil_data_filtered2$cu[soil_data_filtered$Land_Use == "Farm"]
forest_cu <- soil_data_filtered2$cu[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm2 <- shapiro.test(farm_cu)
shapiro_forest2 <- shapiro.test(forest_cu)
print(shapiro_farm2)
##
## Shapiro-Wilk normality test
##
## data: farm_cu
## W = 0.6107, p-value = 3.809e-06
print(shapiro_forest2)
##
## Shapiro-Wilk normality test
##
## data: forest_cu
## W = 1, p-value = 0.996
pH
# Subset for each land use
farm_pH2 <- soil_data_filtered2$pH[soil_data_filtered$Land_Use == "Farm"]
forest_pH2 <- soil_data_filtered2$pH[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_pH2 <- shapiro.test(farm_pH2)
shapiro_forest_pH2 <- shapiro.test(forest_pH2)
print(shapiro_farm_pH2)
##
## Shapiro-Wilk normality test
##
## data: farm_pH2
## W = 0.94376, p-value = 0.2821
print(shapiro_forest_pH2)
##
## Shapiro-Wilk normality test
##
## data: forest_pH2
## W = 0.94612, p-value = 0.5526
som
# Subset for each land use
farm_som2 <- soil_data_filtered2$som[soil_data_filtered$Land_Use == "Farm"]
forest_som2 <- soil_data_filtered2$som[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_som2 <- shapiro.test(farm_som2)
shapiro_forest_som2 <- shapiro.test(forest_som2)
print(shapiro_farm_som2)
##
## Shapiro-Wilk normality test
##
## data: farm_som2
## W = 0.97001, p-value = 0.7551
print(shapiro_forest_som2)
##
## Shapiro-Wilk normality test
##
## data: forest_som2
## W = 0.94518, p-value = 0.5486
clay
# Subset for each land use
farm_clay2 <- soil_data_filtered2$clay[soil_data_filtered$Land_Use == "Farm"]
forest_clay2 <- soil_data_filtered2$clay[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_clay2 <- shapiro.test(farm_clay2)
shapiro_forest_clay2 <- shapiro.test(forest_clay2)
print(shapiro_farm_clay2)
##
## Shapiro-Wilk normality test
##
## data: farm_clay2
## W = 0.93607, p-value = 0.2019
print(shapiro_forest_clay2)
##
## Shapiro-Wilk normality test
##
## data: forest_clay2
## W = 0.91873, p-value = 0.4479
silt
# Subset for each land use
farm_silt2 <- soil_data_filtered2$silt[soil_data_filtered$Land_Use == "Farm"]
forest_silt2 <- soil_data_filtered2$silt[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_silt2 <- shapiro.test(farm_silt2)
shapiro_forest_silt2 <- shapiro.test(forest_silt2)
print(shapiro_farm_silt2)
##
## Shapiro-Wilk normality test
##
## data: farm_silt2
## W = 0.9825, p-value = 0.9621
print(shapiro_forest_silt2)
##
## Shapiro-Wilk normality test
##
## data: forest_silt2
## W = 0.98684, p-value = 0.7804
sand
# Subset for each land use
farm_sand2 <- soil_data_filtered2$sand[soil_data_filtered$Land_Use == "Farm"]
forest_sand2 <- soil_data_filtered2$sand[soil_data_filtered$Land_Use == "Forest"]
# Perform Shapiro-Wilk test on each group
shapiro_farm_sand2 <- shapiro.test(farm_sand2)
shapiro_forest_sand2 <- shapiro.test(forest_sand2)
print(shapiro_farm_sand2)
##
## Shapiro-Wilk normality test
##
## data: farm_sand2
## W = 0.96245, p-value = 0.5939
print(shapiro_forest_sand2)
##
## Shapiro-Wilk normality test
##
## data: forest_sand2
## W = 0.96402, p-value = 0.6355
equality of variance test 0-20
cu
var.test(farm_cu, forest_cu)
##
## F test to compare two variances
##
## data: farm_cu and forest_cu
## F = 0.84651, num df = 19, denom df = 2, p-value = 0.6569
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.02146034 3.81566371
## sample estimates:
## ratio of variances
## 0.8465092
pH
var.test(farm_pH, forest_pH)
##
## F test to compare two variances
##
## data: farm_pH and forest_pH
## F = 0.78042, num df = 19, denom df = 2, p-value = 0.6012
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.01978495 3.51777783
## sample estimates:
## ratio of variances
## 0.7804228
som
var.test(farm_som, forest_som)
##
## F test to compare two variances
##
## data: farm_som and forest_som
## F = 15.242, num df = 19, denom df = 2, p-value = 0.1266
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3864193 68.7056319
## sample estimates:
## ratio of variances
## 15.24242
clay
levene_result <- leveneTest(clay ~ Land_Use, data = soil_data_filtered)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
print(levene_result)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0695 0.7946
## 21
silt
var.test(farm_silt, forest_silt)
##
## F test to compare two variances
##
## data: farm_silt and forest_silt
## F = 106.43, num df = 19, denom df = 2, p-value = 0.0187
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 2.698046 479.714514
## sample estimates:
## ratio of variances
## 106.4252
var.test(farm_sand, forest_sand)
##
## F test to compare two variances
##
## data: farm_sand and forest_sand
## F = 8.8179, num df = 19, denom df = 2, p-value = 0.2132
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.2235479 39.7469785
## sample estimates:
## ratio of variances
## 8.817911
20-40
cu
levene_result2 <- leveneTest(cu ~ Land_Use, data = soil_data_filtered2)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
print(levene_result2)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.1632 0.6903
## 21
pH, som, clay, silt, sand
var.test(farm_pH2, forest_pH2)
##
## F test to compare two variances
##
## data: farm_pH2 and forest_pH2
## F = 0.38717, num df = 19, denom df = 2, p-value = 0.2036
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.009815367 1.745179318
## sample estimates:
## ratio of variances
## 0.3871699
var.test(farm_som2, forest_som2)
##
## F test to compare two variances
##
## data: farm_som2 and forest_som2
## F = 2.9108, num df = 19, denom df = 2, p-value = 0.5729
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.07379229 13.12032115
## sample estimates:
## ratio of variances
## 2.910758
var.test(farm_clay2, forest_clay2)
##
## F test to compare two variances
##
## data: farm_clay2 and forest_clay2
## F = 12.872, num df = 19, denom df = 2, p-value = 0.1489
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3263245 58.0207308
## sample estimates:
## ratio of variances
## 12.87196
var.test(farm_silt2, forest_silt2)
##
## F test to compare two variances
##
## data: farm_silt2 and forest_silt2
## F = 2.1061, num df = 19, denom df = 2, p-value = 0.7416
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.0533921 9.4931536
## sample estimates:
## ratio of variances
## 2.106066
var.test(farm_sand2, forest_sand2)
##
## F test to compare two variances
##
## data: farm_sand2 and forest_sand2
## F = 4.8481, num df = 19, denom df = 2, p-value = 0.3692
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1229068 21.8529107
## sample estimates:
## ratio of variances
## 4.848092
T test
Cu
t.test(cu ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
##
## Two Sample t-test
##
## data: cu by Land_Use
## t = 0.77574, df = 21, p-value = 0.4466
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -0.2195144 0.4807144
## sample estimates:
## mean in group Farm mean in group Forest
## 0.4666 0.3360
wilcox.test(cu ~ Land_Use, data = soil_data_filtered2)
##
## Wilcoxon rank sum exact test
##
## data: cu by Land_Use
## W = 8, p-value = 0.0463
## alternative hypothesis: true location shift is not equal to 0
pH
t.test(pH ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
##
## Two Sample t-test
##
## data: pH by Land_Use
## t = -4.9198, df = 21, p-value = 7.245e-05
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -1.1665943 -0.4733724
## sample estimates:
## mean in group Farm mean in group Forest
## 4.513350 5.333333
t.test(pH ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
##
## Two Sample t-test
##
## data: pH by Land_Use
## t = -4.9776, df = 21, p-value = 6.319e-05
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -1.1244036 -0.4617297
## sample estimates:
## mean in group Farm mean in group Forest
## 4.643600 5.436667
som
t.test(som ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
##
## Two Sample t-test
##
## data: som by Land_Use
## t = -2.5966, df = 21, p-value = 0.01684
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -3.7565260 -0.4153407
## sample estimates:
## mean in group Farm mean in group Forest
## 8.18740 10.27333
t.test(som ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
##
## Two Sample t-test
##
## data: som by Land_Use
## t = -2.3466, df = 21, p-value = 0.02884
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -4.0743190 -0.2457477
## sample estimates:
## mean in group Farm mean in group Forest
## 6.543300 8.703333
clay
wilcox.test(clay ~ Land_Use, data = soil_data_filtered)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: clay by Land_Use
## W = 17.5, p-value = 0.2727
## alternative hypothesis: true location shift is not equal to 0
t.test(clay ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
##
## Two Sample t-test
##
## data: clay by Land_Use
## t = -1.3148, df = 21, p-value = 0.2028
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -3.7189557 0.8379557
## sample estimates:
## mean in group Farm mean in group Forest
## 4.3795 5.8200
silt
t.test(silt ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
##
## Two Sample t-test
##
## data: silt by Land_Use
## t = 0.60005, df = 21, p-value = 0.5549
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -5.948612 10.773612
## sample estimates:
## mean in group Farm mean in group Forest
## 12.1625 9.7500
t.test(silt ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
##
## Two Sample t-test
##
## data: silt by Land_Use
## t = 0.2193, df = 21, p-value = 0.8285
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -5.124995 6.333328
## sample estimates:
## mean in group Farm mean in group Forest
## 9.937500 9.333333
sand
t.test(sand ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
##
## Two Sample t-test
##
## data: sand by Land_Use
## t = -0.09647, df = 21, p-value = 0.9241
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -11.80108 10.75475
## sample estimates:
## mean in group Farm mean in group Forest
## 80.83350 81.35667
t.test(sand ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
##
## Two Sample t-test
##
## data: sand by Land_Use
## t = 0.23821, df = 21, p-value = 0.814
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
## -6.464961 8.137627
## sample estimates:
## mean in group Farm mean in group Forest
## 85.68300 84.84667
Figures
pH
df_summarypH <- soil_data %>%
dplyr::group_by(Land_Use, Depth) %>%
dplyr::summarise(
mean_pH = mean(pH, na.rm = TRUE),
se_pH = sd(pH, na.rm = TRUE) / sqrt(n())
)
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summarypH)
## # A tibble: 4 × 4
## # Groups: Land_Use [2]
## Land_Use Depth mean_pH se_pH
## <chr> <chr> <dbl> <dbl>
## 1 Farm 0-20 4.51 0.0594
## 2 Farm 20-40 4.64 0.0536
## 3 Forest 0-20 5.33 0.174
## 4 Forest 20-40 5.44 0.223
p <- ggplot(df_summarypH, aes(x = Depth, y = mean_pH, fill = Land_Use)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
geom_errorbar(aes(ymin = mean_pH - se_pH, ymax = mean_pH + se_pH),
width = 0.2, position = position_dodge(width = 0.7)) +
labs(x = "Soil Depth (cm)", y = "Mean pH Level") +
theme_classic(base_family = "Times New Roman") +
lims(y = c(0, 12)) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank()
) +
scale_fill_brewer(
palette = "Dark2",
name = "Land Use",
labels = c("Vegetable Farms", "Forests")
)
print(p)

# Save the plot as a high-resolution PNG
ggsave("mean_som_plot_highres.jpeg", plot = p, width = 8, height = 6, dpi = 2000)
som
df_summarysom <- soil_data %>%
dplyr::group_by(Land_Use, Depth) %>%
dplyr::summarise(
mean_som = mean(som, na.rm = TRUE),
se_som = sd(som, na.rm = TRUE) / sqrt(n())
)
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summarypH)
## # A tibble: 4 × 4
## # Groups: Land_Use [2]
## Land_Use Depth mean_pH se_pH
## <chr> <chr> <dbl> <dbl>
## 1 Farm 0-20 4.51 0.0594
## 2 Farm 20-40 4.64 0.0536
## 3 Forest 0-20 5.33 0.174
## 4 Forest 20-40 5.44 0.223
p <- ggplot(df_summarysom, aes(x = Depth, y = mean_som, fill = Land_Use)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
geom_errorbar(aes(ymin = mean_som - se_som, ymax = mean_som + se_som),
width = 0.2, position = position_dodge(width = 0.7)) +
labs(x = "Soil Depth (cm)", y = "Mean % SOM Level") +
theme_classic(base_family = "Times New Roman") +
lims(y = c(0, 12)) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank()
) +
scale_fill_brewer(
palette = "Dark2",
name = "Land Use",
labels = c("Vegetable Farms", "Forests")
)
print(p)

# Save the plot as a high-resolution PNG
ggsave("mean_som_plot_highres.jpeg", plot = p, width = 8, height = 6, dpi = 2000)
cu
df_summarycu <- soil_data %>%
dplyr::group_by(Land_Use, Depth) %>%
dplyr::summarise(
mean_cu = mean(cu, na.rm = TRUE),
se_cu = sd(cu, na.rm = TRUE) / sqrt(n())
)
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summarycu)
## # A tibble: 4 × 4
## # Groups: Land_Use [2]
## Land_Use Depth mean_cu se_cu
## <chr> <chr> <dbl> <dbl>
## 1 Farm 0-20 0.467 0.0637
## 2 Farm 20-40 0.452 0.115
## 3 Forest 0-20 0.336 0.0422
## 4 Forest 20-40 1.04 0.322
cu_tolerable <- 70
ggplot(df_summarycu, aes(x = Depth, y = mean_cu, fill = Land_Use)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
geom_errorbar(aes(ymin = mean_cu - se_cu, ymax = mean_cu + se_cu),
width = 0.2, position = position_dodge(width = 0.7)) +
geom_hline(yintercept = cu_tolerable, linetype = "dashed", color = "red", size = 1) +
scale_y_continuous(breaks = seq(0, max(df_summarycu$mean_cu + df_summarycu$se_cu, na.rm = TRUE) + 10, by = 10)) +
annotate("text", x = 1.5, y = cu_tolerable + 4, label = "Tolerable Cu limit (USA EPA)", color = "red", size = 3) +
labs(x = "Soil Depth (cm)", y = "Mean Extractable Cu (ppm) Level") +
theme_classic(base_family = "Times New Roman") +
lims(y = c(0, 80)) +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank()
) +
scale_fill_brewer(
palette = "Dark2",
name = "Land Use",
labels = c("Vegetable Farms", "Forests")
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

# Save the plot as a high-resolution PNG
ggsave("mean_cu_plot_highres.jpeg", plot = p, width = 8, height = 6, dpi = 2500)
df_summary <- soil_data %>%
dplyr::group_by(Land_Use, Depth) %>%
dplyr::summarise(
mean_pH = mean(pH, na.rm = TRUE),
se_pH = sd(pH, na.rm = TRUE) / sqrt(n()),
mean_clay = mean(clay, na.rm = TRUE),
se_clay = sd(clay, na.rm = TRUE) / sqrt(n()),
mean_sand = mean(sand, na.rm = TRUE),
se_sand = sd(sand, na.rm = TRUE) / sqrt(n()),
mean_silt = mean(silt, na.rm = TRUE),
se_silt = sd(silt, na.rm = TRUE) / sqrt(n())
)
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summary)
## # A tibble: 4 × 10
## # Groups: Land_Use [2]
## Land_Use Depth mean_pH se_pH mean_clay se_clay mean_sand se_sand mean_silt
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Farm 0-20 4.51 0.0594 7.00 0.651 80.8 2.05 12.2
## 2 Farm 20-40 4.64 0.0536 4.38 0.414 85.7 1.32 9.94
## 3 Forest 0-20 5.33 0.174 8.89 1.68 81.4 1.78 9.75
## 4 Forest 20-40 5.44 0.223 5.82 0.298 84.8 1.55 9.33
## # ℹ 1 more variable: se_silt <dbl>
library(tidyr)
soil separates
# Step 1: Rename mean and SE columns
df_summary_renamed <- df_summary %>%
rename(
Clay = mean_clay,
Silt = mean_silt,
Sand = mean_sand,
se_Clay = se_clay,
se_Silt = se_silt,
se_Sand = se_sand
)
# Step 2: Pivot means and SEs to long format
df_mean_long <- df_summary_renamed %>%
pivot_longer(cols = c(Clay, Silt, Sand),
names_to = "Soil_Component",
values_to = "Mean_Percent")
df_se_long <- df_summary_renamed %>%
pivot_longer(cols = c(se_Clay, se_Silt, se_Sand),
names_to = "Soil_Component",
values_to = "SE") %>%
mutate(Soil_Component = sub("se_", "", Soil_Component)) # Clean names
# Step 3: Combine mean and SE data
df_long_combined <- left_join(df_mean_long, df_se_long, by = c("Land_Use", "Depth", "Soil_Component"))
# Step 4: Plot with error bars
ggplot(df_long_combined, aes(x = Depth, y = Mean_Percent, color = Soil_Component, group = Soil_Component)) +
geom_line() +
geom_point() +
geom_errorbar(aes(ymin = Mean_Percent - SE, ymax = Mean_Percent + SE), width = 0.2) +
labs(x = "Soil Depth (cm)", y = "Mean Percent (%)") +
theme_classic(base_family = "Times New Roman") +
scale_color_brewer(palette = "Set2", name = "Soil Separates") + # More visible palette
facet_wrap(~ Land_Use, scales = "free_y") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())

ggsave("thres.png", plot = p, width = 8, height = 6, dpi = 1200)
correlations
soil_data <- read_excel("RawdataF.xlsx", sheet = "SOIL_DATA")
head(soil_data)
## # A tibble: 6 × 10
## Code Land_Use Replication Depth pH clay silt sand som cu
## <chr> <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1 Farm 1 0-20 5.05 5.74 9.75 84.5 9.30 0.762
## 2 B1 Farm 2 0-20 5.08 4.24 1 94.8 6.40 0.179
## 3 C1 Farm 3 0-20 4.29 14.7 29.8 55.5 10.1 0.841
## 4 D1 Farm 4 0-20 4.54 13.2 14.8 72.0 8.93 0.646
## 5 E1 Farm 5 0-20 4.34 7.47 12.8 79.8 9.17 0.411
## 6 F1 Farm 6 0-20 4.31 4.98 12.2 82.8 5.50 0.365
0-20 farm
soil_data |>
filter(Depth == "0-20", Land_Use == "Farm") |>
select(-c(Land_Use, Code, Depth, Replication)) |>
correlation::correlation(include_factors = TRUE, method = "pearson")
## # Correlation Matrix (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(18) | p
## ---------------------------------------------------------------------
## pH | clay | -0.25 | [-0.62, 0.22] | -1.08 | 0.897
## pH | silt | -0.33 | [-0.68, 0.13] | -1.51 | 0.897
## pH | sand | 0.33 | [-0.13, 0.67] | 1.47 | 0.897
## pH | som | -0.13 | [-0.54, 0.33] | -0.55 | 0.897
## pH | cu | 0.27 | [-0.20, 0.64] | 1.19 | 0.897
## clay | silt | 0.72 | [ 0.41, 0.88] | 4.45 | 0.004**
## clay | sand | -0.86 | [-0.94, -0.67] | -7.07 | < .001***
## clay | som | 0.46 | [ 0.02, 0.75] | 2.19 | 0.294
## clay | cu | 0.31 | [-0.15, 0.66] | 1.40 | 0.897
## silt | sand | -0.98 | [-0.99, -0.94] | -18.84 | < .001***
## silt | som | 0.58 | [ 0.19, 0.82] | 3.06 | 0.081
## silt | cu | 0.56 | [ 0.16, 0.81] | 2.89 | 0.097
## sand | som | -0.58 | [-0.81, -0.19] | -3.04 | 0.081
## sand | cu | -0.52 | [-0.78, -0.10] | -2.58 | 0.151
## som | cu | 0.56 | [ 0.16, 0.80] | 2.87 | 0.097
##
## p-value adjustment method: Holm (1979)
## Observations: 20
20-40 farm
soil_data |>
filter(Depth == "20-40", Land_Use == "Farm") |>
select(-c(Land_Use, Code, Depth, Replication)) |>
correlation::correlation(include_factors = TRUE, method = "spearman")
## # Correlation Matrix (spearman-method)
##
## Parameter1 | Parameter2 | rho | 95% CI | S | p
## ----------------------------------------------------------------------
## pH | clay | -0.29 | [-0.66, 0.19] | 1720.00 | 0.838
## pH | silt | -0.10 | [-0.53, 0.37] | 1459.15 | > .999
## pH | sand | 0.16 | [-0.32, 0.57] | 1122.92 | > .999
## pH | som | -0.48 | [-0.77, -0.03] | 1967.24 | 0.358
## pH | cu | -0.41 | [-0.73, 0.06] | 1870.00 | 0.681
## clay | silt | 0.58 | [ 0.16, 0.82] | 565.14 | 0.104
## clay | sand | -0.66 | [-0.86, -0.30] | 2210.33 | 0.021*
## clay | som | 0.33 | [-0.15, 0.68] | 892.84 | 0.804
## clay | cu | 0.40 | [-0.06, 0.73] | 792.00 | 0.681
## silt | sand | -0.98 | [-0.99, -0.96] | 2639.97 | < .001***
## silt | som | 0.37 | [-0.10, 0.71] | 832.75 | 0.731
## silt | cu | 0.47 | [ 0.02, 0.76] | 708.30 | 0.377
## sand | som | -0.35 | [-0.69, 0.13] | 1791.35 | 0.804
## sand | cu | -0.49 | [-0.77, -0.05] | 1980.24 | 0.344
## som | cu | 0.14 | [-0.33, 0.56] | 1138.93 | > .999
##
## p-value adjustment method: Holm (1979)
## Observations: 20
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
Figures
correlation 0-20
ggpairs(
soil_data |>
filter(Depth == "0-20", Land_Use == "Farm") |>
select(-c(Land_Use, Code, Depth, Replication)),
upper = list(continuous = wrap("cor", method = "pearson"))
)

ggsave("mean_cor_plot_highres.png", plot = p, width = 8, height = 6, dpi = 600)
correlation 20-40
ggpairs(
soil_data |>
filter(Depth == "20-40", Land_Use == "Farm") |>
select(-c(Land_Use, Code, Depth, Replication)),
upper = list(continuous = wrap("cor", method = "spearman"))
)
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties

ggsave("mean_cor_plot_highres.png", plot = p, width = 8, height = 6, dpi = 600)