library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(ggstatsplot)
## You can cite this package as:
##      Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
##      Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(extrafont)
## Registering fonts with R

Noramlity Test

Read file

soil_data <- read_excel("RawdataF.xlsx", sheet = "SOIL_DATA")
print(soil_data)
## # A tibble: 46 × 10
##    Code  Land_Use Replication Depth    pH  clay  silt  sand   som    cu
##    <chr> <chr>          <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 A1    Farm               1 0-20   5.05  5.74  9.75  84.5  9.30 0.762
##  2 B1    Farm               2 0-20   5.08  4.24  1     94.8  6.40 0.179
##  3 C1    Farm               3 0-20   4.29 14.7  29.8   55.5 10.1  0.841
##  4 D1    Farm               4 0-20   4.54 13.2  14.8   72.0  8.93 0.646
##  5 E1    Farm               5 0-20   4.34  7.47 12.8   79.8  9.17 0.411
##  6 F1    Farm               6 0-20   4.31  4.98 12.2   82.8  5.50 0.365
##  7 G1    Farm               7 0-20   4.39  5.73  6.75  87.5  8.88 0.387
##  8 H1    Farm               8 0-20   4.47  3.98  1.5   94.5  6.45 0.256
##  9 HI1   Farm               9 0-20   4.61  8.98 17     74.0  7.97 0.539
## 10 J1    Farm              10 0-20   4.84  6.46 13.5   80.0  9.26 0.858
## # ℹ 36 more rows

Filter 0-20

# Filter for depth 0–20 cm
soil_data_filtered <- soil_data %>% filter(Depth == "0-20")
print(soil_data_filtered)
## # A tibble: 23 × 10
##    Code  Land_Use Replication Depth    pH  clay  silt  sand   som    cu
##    <chr> <chr>          <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 A1    Farm               1 0-20   5.05  5.74  9.75  84.5  9.30 0.762
##  2 B1    Farm               2 0-20   5.08  4.24  1     94.8  6.40 0.179
##  3 C1    Farm               3 0-20   4.29 14.7  29.8   55.5 10.1  0.841
##  4 D1    Farm               4 0-20   4.54 13.2  14.8   72.0  8.93 0.646
##  5 E1    Farm               5 0-20   4.34  7.47 12.8   79.8  9.17 0.411
##  6 F1    Farm               6 0-20   4.31  4.98 12.2   82.8  5.50 0.365
##  7 G1    Farm               7 0-20   4.39  5.73  6.75  87.5  8.88 0.387
##  8 H1    Farm               8 0-20   4.47  3.98  1.5   94.5  6.45 0.256
##  9 HI1   Farm               9 0-20   4.61  8.98 17     74.0  7.97 0.539
## 10 J1    Farm              10 0-20   4.84  6.46 13.5   80.0  9.26 0.858
## # ℹ 13 more rows

Shapiro Cu

# Subset for each land use
farm_cu <- soil_data_filtered$cu[soil_data_filtered$Land_Use == "Farm"]
forest_cu <- soil_data_filtered$cu[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm <- shapiro.test(farm_cu)
shapiro_forest <- shapiro.test(forest_cu)

print(shapiro_farm)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_cu
## W = 0.91234, p-value = 0.07063
print(shapiro_forest)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_cu
## W = 0.99775, p-value = 0.9094

Soil pH Shapiro

# Subset for each land use
farm_pH <- soil_data_filtered$pH[soil_data_filtered$Land_Use == "Farm"]
forest_pH <- soil_data_filtered$pH[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_pH <- shapiro.test(farm_pH)
shapiro_forest_pH <- shapiro.test(forest_pH)

print(shapiro_farm_pH)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_pH
## W = 0.91519, p-value = 0.08009
print(shapiro_forest_pH)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_pH
## W = 0.86694, p-value = 0.2869

OM shapiro

# Subset for each land use
farm_som <- soil_data_filtered$som[soil_data_filtered$Land_Use == "Farm"]
forest_som <- soil_data_filtered$som[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_som <- shapiro.test(farm_som)
shapiro_forest_som <- shapiro.test(forest_som)

print(shapiro_farm_som)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_som
## W = 0.91961, p-value = 0.09737
print(shapiro_forest_som)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_som
## W = 0.95353, p-value = 0.5851

clay shapiro

# Subset for each land use
farm_clay <- soil_data_filtered$clay[soil_data_filtered$Land_Use == "Farm"]
forest_clay <- soil_data_filtered$clay[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_clay <- shapiro.test(farm_clay)
shapiro_forest_clay <- shapiro.test(forest_clay)

print(shapiro_farm_clay)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_clay
## W = 0.85156, p-value = 0.005661
print(shapiro_forest_clay)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_clay
## W = 0.88194, p-value = 0.3301

silt shapiro

# Subset for each land use
farm_silt <- soil_data_filtered$silt[soil_data_filtered$Land_Use == "Farm"]
forest_silt <- soil_data_filtered$silt[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_silt <- shapiro.test(farm_silt)
shapiro_forest_silt <- shapiro.test(forest_silt)

print(shapiro_farm_silt)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_silt
## W = 0.95349, p-value = 0.4232
print(shapiro_forest_silt)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_silt
## W = 0.89286, p-value = 0.3631

sand shapiro

# Subset for each land use
farm_sand <- soil_data_filtered$sand[soil_data_filtered$Land_Use == "Farm"]
forest_sand <- soil_data_filtered$sand[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_sand <- shapiro.test(farm_sand)
shapiro_forest_sand <- shapiro.test(forest_sand)

print(shapiro_farm_sand)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_sand
## W = 0.93608, p-value = 0.202
print(shapiro_forest_sand)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_sand
## W = 0.78427, p-value = 0.07746

20-40

# Filter for depth 20-40 cm
soil_data_filtered2 <- soil_data %>% filter(Depth == "20-40")
print(soil_data_filtered2)
## # A tibble: 23 × 10
##    Code  Land_Use Replication Depth    pH  clay  silt  sand   som    cu
##    <chr> <chr>          <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 A2    Farm               1 20-40  5.06  3.48 17     79.5  7.26 0.241
##  2 B2    Farm               2 20-40  5.19  4.24  1     94.8  4.6  0.157
##  3 C2    Farm               3 20-40  4.46  9.47 19     71.5  7.94 0.278
##  4 D2    Farm               4 20-40  4.52  6.73 13.5   79.8  7.67 0.236
##  5 E2    Farm               5 20-40  4.41  4.99  7.25  87.8  7.33 0.207
##  6 F2    Farm               6 20-40  4.55  3.49  7.25  89.3  3.24 0.216
##  7 G2    Farm               7 20-40  4.68  2     5     93    6.39 0.128
##  8 H2    Farm               8 20-40  4.74  2.24  6.5   91.3  4.56 0.113
##  9 I2    Farm               9 20-40  4.73  2.23  8     89.8  8.20 0.173
## 10 J2    Farm              10 20-40  4.79  5.2  12.8   82.0  7.22 0.284
## # ℹ 13 more rows

Cu

# Subset for each land use
farm_cu <- soil_data_filtered2$cu[soil_data_filtered$Land_Use == "Farm"]
forest_cu <- soil_data_filtered2$cu[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm2 <- shapiro.test(farm_cu)
shapiro_forest2 <- shapiro.test(forest_cu)

print(shapiro_farm2)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_cu
## W = 0.6107, p-value = 3.809e-06
print(shapiro_forest2)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_cu
## W = 1, p-value = 0.996

pH

# Subset for each land use
farm_pH2 <- soil_data_filtered2$pH[soil_data_filtered$Land_Use == "Farm"]
forest_pH2 <- soil_data_filtered2$pH[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_pH2 <- shapiro.test(farm_pH2)
shapiro_forest_pH2 <- shapiro.test(forest_pH2)

print(shapiro_farm_pH2)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_pH2
## W = 0.94376, p-value = 0.2821
print(shapiro_forest_pH2)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_pH2
## W = 0.94612, p-value = 0.5526

som

# Subset for each land use
farm_som2 <- soil_data_filtered2$som[soil_data_filtered$Land_Use == "Farm"]
forest_som2 <- soil_data_filtered2$som[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_som2 <- shapiro.test(farm_som2)
shapiro_forest_som2 <- shapiro.test(forest_som2)

print(shapiro_farm_som2)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_som2
## W = 0.97001, p-value = 0.7551
print(shapiro_forest_som2)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_som2
## W = 0.94518, p-value = 0.5486

clay

# Subset for each land use
farm_clay2 <- soil_data_filtered2$clay[soil_data_filtered$Land_Use == "Farm"]
forest_clay2 <- soil_data_filtered2$clay[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_clay2 <- shapiro.test(farm_clay2)
shapiro_forest_clay2 <- shapiro.test(forest_clay2)

print(shapiro_farm_clay2)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_clay2
## W = 0.93607, p-value = 0.2019
print(shapiro_forest_clay2)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_clay2
## W = 0.91873, p-value = 0.4479

silt

# Subset for each land use
farm_silt2 <- soil_data_filtered2$silt[soil_data_filtered$Land_Use == "Farm"]
forest_silt2 <- soil_data_filtered2$silt[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_silt2 <- shapiro.test(farm_silt2)
shapiro_forest_silt2 <- shapiro.test(forest_silt2)

print(shapiro_farm_silt2)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_silt2
## W = 0.9825, p-value = 0.9621
print(shapiro_forest_silt2)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_silt2
## W = 0.98684, p-value = 0.7804

sand

# Subset for each land use
farm_sand2 <- soil_data_filtered2$sand[soil_data_filtered$Land_Use == "Farm"]
forest_sand2 <- soil_data_filtered2$sand[soil_data_filtered$Land_Use == "Forest"]

# Perform Shapiro-Wilk test on each group
shapiro_farm_sand2 <- shapiro.test(farm_sand2)
shapiro_forest_sand2 <- shapiro.test(forest_sand2)

print(shapiro_farm_sand2)
## 
##  Shapiro-Wilk normality test
## 
## data:  farm_sand2
## W = 0.96245, p-value = 0.5939
print(shapiro_forest_sand2)
## 
##  Shapiro-Wilk normality test
## 
## data:  forest_sand2
## W = 0.96402, p-value = 0.6355

equality of variance test 0-20

cu

var.test(farm_cu, forest_cu)
## 
##  F test to compare two variances
## 
## data:  farm_cu and forest_cu
## F = 0.84651, num df = 19, denom df = 2, p-value = 0.6569
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.02146034 3.81566371
## sample estimates:
## ratio of variances 
##          0.8465092

pH

var.test(farm_pH, forest_pH)
## 
##  F test to compare two variances
## 
## data:  farm_pH and forest_pH
## F = 0.78042, num df = 19, denom df = 2, p-value = 0.6012
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.01978495 3.51777783
## sample estimates:
## ratio of variances 
##          0.7804228

som

var.test(farm_som, forest_som)
## 
##  F test to compare two variances
## 
## data:  farm_som and forest_som
## F = 15.242, num df = 19, denom df = 2, p-value = 0.1266
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.3864193 68.7056319
## sample estimates:
## ratio of variances 
##           15.24242

clay

levene_result <- leveneTest(clay ~ Land_Use, data = soil_data_filtered)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
print(levene_result)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.0695 0.7946
##       21

silt

var.test(farm_silt, forest_silt)
## 
##  F test to compare two variances
## 
## data:  farm_silt and forest_silt
## F = 106.43, num df = 19, denom df = 2, p-value = 0.0187
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##    2.698046 479.714514
## sample estimates:
## ratio of variances 
##           106.4252
var.test(farm_sand, forest_sand)
## 
##  F test to compare two variances
## 
## data:  farm_sand and forest_sand
## F = 8.8179, num df = 19, denom df = 2, p-value = 0.2132
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.2235479 39.7469785
## sample estimates:
## ratio of variances 
##           8.817911

20-40

cu

levene_result2 <- leveneTest(cu ~ Land_Use, data = soil_data_filtered2)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
print(levene_result2)
## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  1  0.1632 0.6903
##       21

pH, som, clay, silt, sand

var.test(farm_pH2, forest_pH2)
## 
##  F test to compare two variances
## 
## data:  farm_pH2 and forest_pH2
## F = 0.38717, num df = 19, denom df = 2, p-value = 0.2036
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.009815367 1.745179318
## sample estimates:
## ratio of variances 
##          0.3871699
var.test(farm_som2, forest_som2)
## 
##  F test to compare two variances
## 
## data:  farm_som2 and forest_som2
## F = 2.9108, num df = 19, denom df = 2, p-value = 0.5729
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.07379229 13.12032115
## sample estimates:
## ratio of variances 
##           2.910758
var.test(farm_clay2, forest_clay2)
## 
##  F test to compare two variances
## 
## data:  farm_clay2 and forest_clay2
## F = 12.872, num df = 19, denom df = 2, p-value = 0.1489
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.3263245 58.0207308
## sample estimates:
## ratio of variances 
##           12.87196
var.test(farm_silt2, forest_silt2)
## 
##  F test to compare two variances
## 
## data:  farm_silt2 and forest_silt2
## F = 2.1061, num df = 19, denom df = 2, p-value = 0.7416
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.0533921 9.4931536
## sample estimates:
## ratio of variances 
##           2.106066
var.test(farm_sand2, forest_sand2)
## 
##  F test to compare two variances
## 
## data:  farm_sand2 and forest_sand2
## F = 4.8481, num df = 19, denom df = 2, p-value = 0.3692
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.1229068 21.8529107
## sample estimates:
## ratio of variances 
##           4.848092

T test

Cu

t.test(cu ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  cu by Land_Use
## t = 0.77574, df = 21, p-value = 0.4466
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -0.2195144  0.4807144
## sample estimates:
##   mean in group Farm mean in group Forest 
##               0.4666               0.3360
wilcox.test(cu ~ Land_Use, data = soil_data_filtered2)
## 
##  Wilcoxon rank sum exact test
## 
## data:  cu by Land_Use
## W = 8, p-value = 0.0463
## alternative hypothesis: true location shift is not equal to 0

pH

t.test(pH ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  pH by Land_Use
## t = -4.9198, df = 21, p-value = 7.245e-05
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -1.1665943 -0.4733724
## sample estimates:
##   mean in group Farm mean in group Forest 
##             4.513350             5.333333
t.test(pH ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  pH by Land_Use
## t = -4.9776, df = 21, p-value = 6.319e-05
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -1.1244036 -0.4617297
## sample estimates:
##   mean in group Farm mean in group Forest 
##             4.643600             5.436667

som

t.test(som ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  som by Land_Use
## t = -2.5966, df = 21, p-value = 0.01684
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -3.7565260 -0.4153407
## sample estimates:
##   mean in group Farm mean in group Forest 
##              8.18740             10.27333
t.test(som ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  som by Land_Use
## t = -2.3466, df = 21, p-value = 0.02884
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -4.0743190 -0.2457477
## sample estimates:
##   mean in group Farm mean in group Forest 
##             6.543300             8.703333

clay

wilcox.test(clay ~ Land_Use, data = soil_data_filtered)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  clay by Land_Use
## W = 17.5, p-value = 0.2727
## alternative hypothesis: true location shift is not equal to 0
t.test(clay ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  clay by Land_Use
## t = -1.3148, df = 21, p-value = 0.2028
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -3.7189557  0.8379557
## sample estimates:
##   mean in group Farm mean in group Forest 
##               4.3795               5.8200

silt

t.test(silt ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  silt by Land_Use
## t = 0.60005, df = 21, p-value = 0.5549
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -5.948612 10.773612
## sample estimates:
##   mean in group Farm mean in group Forest 
##              12.1625               9.7500
t.test(silt ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  silt by Land_Use
## t = 0.2193, df = 21, p-value = 0.8285
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -5.124995  6.333328
## sample estimates:
##   mean in group Farm mean in group Forest 
##             9.937500             9.333333

sand

t.test(sand ~ Land_Use, data = soil_data_filtered, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  sand by Land_Use
## t = -0.09647, df = 21, p-value = 0.9241
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -11.80108  10.75475
## sample estimates:
##   mean in group Farm mean in group Forest 
##             80.83350             81.35667
t.test(sand ~ Land_Use, data = soil_data_filtered2, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  sand by Land_Use
## t = 0.23821, df = 21, p-value = 0.814
## alternative hypothesis: true difference in means between group Farm and group Forest is not equal to 0
## 95 percent confidence interval:
##  -6.464961  8.137627
## sample estimates:
##   mean in group Farm mean in group Forest 
##             85.68300             84.84667

Figures

pH

df_summarypH <- soil_data %>%
  dplyr::group_by(Land_Use, Depth) %>%
  dplyr::summarise(
    mean_pH = mean(pH, na.rm = TRUE),
    se_pH = sd(pH, na.rm = TRUE) / sqrt(n())
  )
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summarypH)
## # A tibble: 4 × 4
## # Groups:   Land_Use [2]
##   Land_Use Depth mean_pH  se_pH
##   <chr>    <chr>   <dbl>  <dbl>
## 1 Farm     0-20     4.51 0.0594
## 2 Farm     20-40    4.64 0.0536
## 3 Forest   0-20     5.33 0.174 
## 4 Forest   20-40    5.44 0.223
p <- ggplot(df_summarypH, aes(x = Depth, y = mean_pH, fill = Land_Use)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
  geom_errorbar(aes(ymin = mean_pH - se_pH, ymax = mean_pH + se_pH), 
                width = 0.2, position = position_dodge(width = 0.7)) +
  labs(x = "Soil Depth (cm)", y = "Mean pH Level") +
  theme_classic(base_family = "Times New Roman") +  
  lims(y = c(0, 12)) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank()
  ) +
  scale_fill_brewer(
  palette = "Dark2",
  name = "Land Use",
  labels = c("Vegetable Farms", "Forests")
)
print(p)

# Save the plot as a high-resolution PNG
ggsave("mean_som_plot_highres.jpeg", plot = p, width = 8, height = 6, dpi = 2000)

som

df_summarysom <- soil_data %>%
  dplyr::group_by(Land_Use, Depth) %>%
  dplyr::summarise(
    mean_som = mean(som, na.rm = TRUE),
    se_som = sd(som, na.rm = TRUE) / sqrt(n())
  )
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summarypH)
## # A tibble: 4 × 4
## # Groups:   Land_Use [2]
##   Land_Use Depth mean_pH  se_pH
##   <chr>    <chr>   <dbl>  <dbl>
## 1 Farm     0-20     4.51 0.0594
## 2 Farm     20-40    4.64 0.0536
## 3 Forest   0-20     5.33 0.174 
## 4 Forest   20-40    5.44 0.223
p <- ggplot(df_summarysom, aes(x = Depth, y = mean_som, fill = Land_Use)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
  geom_errorbar(aes(ymin = mean_som - se_som, ymax = mean_som + se_som), 
                width = 0.2, position = position_dodge(width = 0.7)) +
  labs(x = "Soil Depth (cm)", y = "Mean % SOM Level") +
  theme_classic(base_family = "Times New Roman") +  
  lims(y = c(0, 12)) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank()
  ) +
  scale_fill_brewer(
  palette = "Dark2",
  name = "Land Use",
  labels = c("Vegetable Farms", "Forests")
)
print(p)

# Save the plot as a high-resolution PNG
ggsave("mean_som_plot_highres.jpeg", plot = p, width = 8, height = 6, dpi = 2000)

cu

df_summarycu <- soil_data %>%
  dplyr::group_by(Land_Use, Depth) %>%
  dplyr::summarise(
    mean_cu = mean(cu, na.rm = TRUE),
    se_cu = sd(cu, na.rm = TRUE) / sqrt(n())
  )
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summarycu)
## # A tibble: 4 × 4
## # Groups:   Land_Use [2]
##   Land_Use Depth mean_cu  se_cu
##   <chr>    <chr>   <dbl>  <dbl>
## 1 Farm     0-20    0.467 0.0637
## 2 Farm     20-40   0.452 0.115 
## 3 Forest   0-20    0.336 0.0422
## 4 Forest   20-40   1.04  0.322
cu_tolerable <- 70
ggplot(df_summarycu, aes(x = Depth, y = mean_cu, fill = Land_Use)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7), width = 0.7) +
  geom_errorbar(aes(ymin = mean_cu - se_cu, ymax = mean_cu + se_cu), 
                width = 0.2, position = position_dodge(width = 0.7)) +
  geom_hline(yintercept = cu_tolerable, linetype = "dashed", color = "red", size = 1) +
  scale_y_continuous(breaks = seq(0, max(df_summarycu$mean_cu + df_summarycu$se_cu, na.rm = TRUE) + 10, by = 10)) +
  annotate("text", x = 1.5, y = cu_tolerable + 4, label = "Tolerable Cu limit (USA EPA)", color = "red", size = 3) +
   labs(x = "Soil Depth (cm)", y = "Mean Extractable Cu (ppm) Level") +
  theme_classic(base_family = "Times New Roman") +  
  lims(y = c(0, 80)) +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_blank()
  ) +
  scale_fill_brewer(
  palette = "Dark2",
  name = "Land Use",
  labels = c("Vegetable Farms", "Forests")
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

# Save the plot as a high-resolution PNG
ggsave("mean_cu_plot_highres.jpeg", plot = p, width = 8, height = 6, dpi = 2500)
df_summary <- soil_data %>%
  dplyr::group_by(Land_Use, Depth) %>%
  dplyr::summarise(
    mean_pH = mean(pH, na.rm = TRUE),
    se_pH = sd(pH, na.rm = TRUE) / sqrt(n()),
    mean_clay = mean(clay, na.rm = TRUE),
    se_clay = sd(clay, na.rm = TRUE) / sqrt(n()),
    mean_sand = mean(sand, na.rm = TRUE),
    se_sand = sd(sand, na.rm = TRUE) / sqrt(n()),
    mean_silt = mean(silt, na.rm = TRUE),
    se_silt = sd(silt, na.rm = TRUE) / sqrt(n())
  )
## `summarise()` has grouped output by 'Land_Use'. You can override using the
## `.groups` argument.
print(df_summary)
## # A tibble: 4 × 10
## # Groups:   Land_Use [2]
##   Land_Use Depth mean_pH  se_pH mean_clay se_clay mean_sand se_sand mean_silt
##   <chr>    <chr>   <dbl>  <dbl>     <dbl>   <dbl>     <dbl>   <dbl>     <dbl>
## 1 Farm     0-20     4.51 0.0594      7.00   0.651      80.8    2.05     12.2 
## 2 Farm     20-40    4.64 0.0536      4.38   0.414      85.7    1.32      9.94
## 3 Forest   0-20     5.33 0.174       8.89   1.68       81.4    1.78      9.75
## 4 Forest   20-40    5.44 0.223       5.82   0.298      84.8    1.55      9.33
## # ℹ 1 more variable: se_silt <dbl>
library(tidyr)

soil separates

# Step 1: Rename mean and SE columns
df_summary_renamed <- df_summary %>%
  rename(
    Clay = mean_clay,
    Silt = mean_silt,
    Sand = mean_sand,
    se_Clay = se_clay,
    se_Silt = se_silt,
    se_Sand = se_sand
  )

# Step 2: Pivot means and SEs to long format
df_mean_long <- df_summary_renamed %>%
  pivot_longer(cols = c(Clay, Silt, Sand),
               names_to = "Soil_Component",
               values_to = "Mean_Percent")

df_se_long <- df_summary_renamed %>%
  pivot_longer(cols = c(se_Clay, se_Silt, se_Sand),
               names_to = "Soil_Component",
               values_to = "SE") %>%
  mutate(Soil_Component = sub("se_", "", Soil_Component))  # Clean names

# Step 3: Combine mean and SE data
df_long_combined <- left_join(df_mean_long, df_se_long, by = c("Land_Use", "Depth", "Soil_Component"))

# Step 4: Plot with error bars
ggplot(df_long_combined, aes(x = Depth, y = Mean_Percent, color = Soil_Component, group = Soil_Component)) +
  geom_line() +
  geom_point() +
  geom_errorbar(aes(ymin = Mean_Percent - SE, ymax = Mean_Percent + SE), width = 0.2) +
  labs(x = "Soil Depth (cm)", y = "Mean Percent (%)") +
  theme_classic(base_family = "Times New Roman") +
  scale_color_brewer(palette = "Set2", name = "Soil Separates") +  # More visible palette
  facet_wrap(~ Land_Use, scales = "free_y") +
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        panel.background = element_blank())

ggsave("thres.png", plot = p, width = 8, height = 6, dpi = 1200)

correlations

soil_data <- read_excel("RawdataF.xlsx", sheet = "SOIL_DATA")
head(soil_data)
## # A tibble: 6 × 10
##   Code  Land_Use Replication Depth    pH  clay  silt  sand   som    cu
##   <chr> <chr>          <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 A1    Farm               1 0-20   5.05  5.74  9.75  84.5  9.30 0.762
## 2 B1    Farm               2 0-20   5.08  4.24  1     94.8  6.40 0.179
## 3 C1    Farm               3 0-20   4.29 14.7  29.8   55.5 10.1  0.841
## 4 D1    Farm               4 0-20   4.54 13.2  14.8   72.0  8.93 0.646
## 5 E1    Farm               5 0-20   4.34  7.47 12.8   79.8  9.17 0.411
## 6 F1    Farm               6 0-20   4.31  4.98 12.2   82.8  5.50 0.365

0-20 farm

soil_data |>
  filter(Depth == "0-20", Land_Use == "Farm") |> 
  select(-c(Land_Use, Code, Depth, Replication)) |> 
  correlation::correlation(include_factors = TRUE, method = "pearson")
## # Correlation Matrix (pearson-method)
## 
## Parameter1 | Parameter2 |     r |         95% CI |  t(18) |         p
## ---------------------------------------------------------------------
## pH         |       clay | -0.25 | [-0.62,  0.22] |  -1.08 | 0.897    
## pH         |       silt | -0.33 | [-0.68,  0.13] |  -1.51 | 0.897    
## pH         |       sand |  0.33 | [-0.13,  0.67] |   1.47 | 0.897    
## pH         |        som | -0.13 | [-0.54,  0.33] |  -0.55 | 0.897    
## pH         |         cu |  0.27 | [-0.20,  0.64] |   1.19 | 0.897    
## clay       |       silt |  0.72 | [ 0.41,  0.88] |   4.45 | 0.004**  
## clay       |       sand | -0.86 | [-0.94, -0.67] |  -7.07 | < .001***
## clay       |        som |  0.46 | [ 0.02,  0.75] |   2.19 | 0.294    
## clay       |         cu |  0.31 | [-0.15,  0.66] |   1.40 | 0.897    
## silt       |       sand | -0.98 | [-0.99, -0.94] | -18.84 | < .001***
## silt       |        som |  0.58 | [ 0.19,  0.82] |   3.06 | 0.081    
## silt       |         cu |  0.56 | [ 0.16,  0.81] |   2.89 | 0.097    
## sand       |        som | -0.58 | [-0.81, -0.19] |  -3.04 | 0.081    
## sand       |         cu | -0.52 | [-0.78, -0.10] |  -2.58 | 0.151    
## som        |         cu |  0.56 | [ 0.16,  0.80] |   2.87 | 0.097    
## 
## p-value adjustment method: Holm (1979)
## Observations: 20

20-40 farm

soil_data |>
  filter(Depth == "20-40", Land_Use == "Farm") |> 
  select(-c(Land_Use, Code, Depth, Replication)) |> 
  correlation::correlation(include_factors = TRUE, method = "spearman")
## # Correlation Matrix (spearman-method)
## 
## Parameter1 | Parameter2 |   rho |         95% CI |       S |         p
## ----------------------------------------------------------------------
## pH         |       clay | -0.29 | [-0.66,  0.19] | 1720.00 | 0.838    
## pH         |       silt | -0.10 | [-0.53,  0.37] | 1459.15 | > .999   
## pH         |       sand |  0.16 | [-0.32,  0.57] | 1122.92 | > .999   
## pH         |        som | -0.48 | [-0.77, -0.03] | 1967.24 | 0.358    
## pH         |         cu | -0.41 | [-0.73,  0.06] | 1870.00 | 0.681    
## clay       |       silt |  0.58 | [ 0.16,  0.82] |  565.14 | 0.104    
## clay       |       sand | -0.66 | [-0.86, -0.30] | 2210.33 | 0.021*   
## clay       |        som |  0.33 | [-0.15,  0.68] |  892.84 | 0.804    
## clay       |         cu |  0.40 | [-0.06,  0.73] |  792.00 | 0.681    
## silt       |       sand | -0.98 | [-0.99, -0.96] | 2639.97 | < .001***
## silt       |        som |  0.37 | [-0.10,  0.71] |  832.75 | 0.731    
## silt       |         cu |  0.47 | [ 0.02,  0.76] |  708.30 | 0.377    
## sand       |        som | -0.35 | [-0.69,  0.13] | 1791.35 | 0.804    
## sand       |         cu | -0.49 | [-0.77, -0.05] | 1980.24 | 0.344    
## som        |         cu |  0.14 | [-0.33,  0.56] | 1138.93 | > .999   
## 
## p-value adjustment method: Holm (1979)
## Observations: 20
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Figures

correlation 0-20

ggpairs(
  soil_data |> 
    filter(Depth == "0-20", Land_Use == "Farm") |> 
    select(-c(Land_Use, Code, Depth, Replication)),
  upper = list(continuous = wrap("cor", method = "pearson"))
)

ggsave("mean_cor_plot_highres.png", plot = p, width = 8, height = 6, dpi = 600)

correlation 20-40

ggpairs(
  soil_data |> 
    filter(Depth == "20-40", Land_Use == "Farm") |> 
    select(-c(Land_Use, Code, Depth, Replication)),
  upper = list(continuous = wrap("cor", method = "spearman"))
)
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties
## Warning in cor.test.default(x, y, method = method, use = use): Cannot compute
## exact p-value with ties

ggsave("mean_cor_plot_highres.png", plot = p, width = 8, height = 6, dpi = 600)