if echo = false, then no code will show in output

Packages

Loading required packages:

library(ggplot2)
library(dplyr)
#install.packages("factoextra")
library(readxl)
library(tidyr)
library(janitor)
library(stringr)
library(ggplot2)
library(rstatix)
library(effsize)
library(FactoMineR)
library(factoextra)

Setting global theme

theme_set(theme_bw())

Data

d95 <- read_excel("Hatia_1995.xlsx") %>% clean_names() %>% mutate(year = 1995)
## Warning in warn_micro_mu(string = string, replace = replace): Watch out!  The mu or micro symbol is in the input string, and may have been converted to 'm' while 'u' may have been expected.  Consider adding the following to the `replace` argument:
## The following characters are in the names to clean but are not replaced: \u00b5
d26 <- read_excel("Hatia_2026.xlsx") %>% clean_names() %>% mutate(year = 2026)
## Warning in warn_micro_mu(string = string, replace = replace): Watch out!  The mu or micro symbol is in the input string, and may have been converted to 'm' while 'u' may have been expected.  Consider adding the following to the `replace` argument:
## The following characters are in the names to clean but are not replaced: \u00b5
dim(d95); dim(d26)
## [1] 73 19
## [1] 186  20
head(d95, 3)
## # A tibble: 3 × 19
##   sl_no soil_series land_type soil_texture    ph electrical_conductivity_ds_m
##   <dbl> <chr>       <chr>     <lgl>        <dbl>                        <dbl>
## 1     1 Ramgati     MHL       NA             6.6                          0.1
## 2     2 Ramgati     MHL       NA             7                            0  
## 3     3 Hatia       MHL       NA             6.6                          0.1
## # ℹ 13 more variables: organic_matter_percent <dbl>, total_n_percent <dbl>,
## #   phosphorus_olsen_mg_gram <dbl>, potassium_mg_100g <dbl>,
## #   sulphur_mg_gram <dbl>, zinc_mg_gram <dbl>, boron_mg_gram <dbl>,
## #   calcium_mg_100g <dbl>, magnesium_mg_100g <dbl>, cupper_mg_gram <dbl>,
## #   iron_mg_gram <dbl>, manganese_mg_gram <dbl>, year <dbl>
head(d26, 3)
## # A tibble: 3 × 20
##   sl_no soil_series land_type soil_texture    ph electrical_conductivity_ds_m
##   <dbl> <chr>       <chr>     <chr>        <dbl> <chr>                       
## 1     1 Ramgati     MHL       Loam           7.8 4.7                         
## 2     2 Ramgati     MHL       Loam           7.7 3.7                         
## 3     3 Ramgati     MHL       Loam           7.4 6.2                         
## # ℹ 14 more variables: organic_matter_percent <dbl>, total_n_percent <dbl>,
## #   phosphorus_olsen_mg_gram <chr>, phosphorus_bray_mg_gram <chr>,
## #   potassium_mg_100g <dbl>, sulphur_mg_gram <dbl>, zinc_mg_gram <dbl>,
## #   boron_mg_gram <dbl>, calcium_mg_100g <dbl>, magnesium_mg_100g <dbl>,
## #   cupper_mg_gram <dbl>, iron_mg_gram <dbl>, manganese_mg_gram <dbl>,
## #   year <dbl>
names(d95)
##  [1] "sl_no"                        "soil_series"                 
##  [3] "land_type"                    "soil_texture"                
##  [5] "ph"                           "electrical_conductivity_ds_m"
##  [7] "organic_matter_percent"       "total_n_percent"             
##  [9] "phosphorus_olsen_mg_gram"     "potassium_mg_100g"           
## [11] "sulphur_mg_gram"              "zinc_mg_gram"                
## [13] "boron_mg_gram"                "calcium_mg_100g"             
## [15] "magnesium_mg_100g"            "cupper_mg_gram"              
## [17] "iron_mg_gram"                 "manganese_mg_gram"           
## [19] "year"
names(d26)
##  [1] "sl_no"                        "soil_series"                 
##  [3] "land_type"                    "soil_texture"                
##  [5] "ph"                           "electrical_conductivity_ds_m"
##  [7] "organic_matter_percent"       "total_n_percent"             
##  [9] "phosphorus_olsen_mg_gram"     "phosphorus_bray_mg_gram"     
## [11] "potassium_mg_100g"            "sulphur_mg_gram"             
## [13] "zinc_mg_gram"                 "boron_mg_gram"               
## [15] "calcium_mg_100g"              "magnesium_mg_100g"           
## [17] "cupper_mg_gram"               "iron_mg_gram"                
## [19] "manganese_mg_gram"            "year"
getwd()
## [1] "C:/Users/User/Desktop/Hatia upazila publication/Hatia code"
list.files()
## [1] "Hatia code.Rproj" "Hatia.R"          "Hatia.Rmd"        "Hatia_1995.xlsx" 
## [5] "Hatia_2026.xlsx"  "Hatia_cache"
to_num <- function(x){
  x <- trimws(as.character(x))
  x[x %in% c("", "-", "NA", "N/A")] <- NA
  suppressWarnings(as.numeric(x))
}

num_cols <- c(
  "ph","electrical_conductivity_ds_m","organic_matter_percent","total_n_percent",
  "phosphorus_olsen_mg_gram","phosphorus_bray_mg_gram","potassium_mg_100g",
  "sulphur_mg_gram","zinc_mg_gram","boron_mg_gram","calcium_mg_100g",
  "magnesium_mg_100g","cupper_mg_gram","iron_mg_gram","manganese_mg_gram"
)


d95 <- d95 %>% mutate(across(any_of(num_cols), to_num))
d26 <- d26 %>% mutate(across(any_of(num_cols), to_num))


str(d95$electrical_conductivity_ds_m)
##  num [1:73] 0.1 0 0.1 0 0.1 0.1 0 0.1 0 0 ...
str(d26$electrical_conductivity_ds_m)
##  num [1:186] 4.7 3.7 6.2 4.1 4.3 4.9 6 2.4 6.1 0.6 ...
# Combine data

dat <- bind_rows(d95, d26) %>%
  mutate(
    year = as.factor(year),
    soil_series = as.factor(soil_series),
    land_type = as.factor(land_type),
    soil_texture = as.factor(soil_texture)
  )


dim(dat)
## [1] 259  20
table(dat$year)
## 
## 1995 2026 
##   73  186
str(d26$electrical_conductivity_ds_m)
##  num [1:186] 4.7 3.7 6.2 4.1 4.3 4.9 6 2.4 6.1 0.6 ...
table(dat$year)
## 
## 1995 2026 
##   73  186

Year-wise median & normality test

vars <- c(
  "electrical_conductivity_ds_m","ph","organic_matter_percent","total_n_percent",
  "phosphorus_olsen_mg_gram","potassium_mg_100g","sulphur_mg_gram",
  "zinc_mg_gram","boron_mg_gram","calcium_mg_100g","magnesium_mg_100g",
  "cupper_mg_gram","iron_mg_gram","manganese_mg_gram"
)

summary_year <- dat %>%
  group_by(year) %>%
  summarise(across(all_of(vars), ~ median(.x, na.rm=TRUE)))

summary_year
## # A tibble: 2 × 15
##   year  electrical_conductivity_d…¹    ph organic_matter_percent total_n_percent
##   <fct>                       <dbl> <dbl>                  <dbl>           <dbl>
## 1 1995                          0    7.1                    1.5             0.07
## 2 2026                          5.3  6.45                   1.95            0.1 
## # ℹ abbreviated name: ¹​electrical_conductivity_ds_m
## # ℹ 10 more variables: phosphorus_olsen_mg_gram <dbl>, potassium_mg_100g <dbl>,
## #   sulphur_mg_gram <dbl>, zinc_mg_gram <dbl>, boron_mg_gram <dbl>,
## #   calcium_mg_100g <dbl>, magnesium_mg_100g <dbl>, cupper_mg_gram <dbl>,
## #   iron_mg_gram <dbl>, manganese_mg_gram <dbl>
print(summary_year)
## # A tibble: 2 × 15
##   year  electrical_conductivity_d…¹    ph organic_matter_percent total_n_percent
##   <fct>                       <dbl> <dbl>                  <dbl>           <dbl>
## 1 1995                          0    7.1                    1.5             0.07
## 2 2026                          5.3  6.45                   1.95            0.1 
## # ℹ abbreviated name: ¹​electrical_conductivity_ds_m
## # ℹ 10 more variables: phosphorus_olsen_mg_gram <dbl>, potassium_mg_100g <dbl>,
## #   sulphur_mg_gram <dbl>, zinc_mg_gram <dbl>, boron_mg_gram <dbl>,
## #   calcium_mg_100g <dbl>, magnesium_mg_100g <dbl>, cupper_mg_gram <dbl>,
## #   iron_mg_gram <dbl>, manganese_mg_gram <dbl>
print(summary_year, width = Inf)
## # A tibble: 2 × 15
##   year  electrical_conductivity_ds_m    ph organic_matter_percent
##   <fct>                        <dbl> <dbl>                  <dbl>
## 1 1995                           0    7.1                    1.5 
## 2 2026                           5.3  6.45                   1.95
##   total_n_percent phosphorus_olsen_mg_gram potassium_mg_100g sulphur_mg_gram
##             <dbl>                    <dbl>             <dbl>           <dbl>
## 1            0.07                     23                0.32             70 
## 2            0.1                      11.7              0.25            113.
##   zinc_mg_gram boron_mg_gram calcium_mg_100g magnesium_mg_100g cupper_mg_gram
##          <dbl>         <dbl>           <dbl>             <dbl>          <dbl>
## 1          1            0.3             6.6               4.4            5   
## 2          0.5          0.18            3.78              2.75           4.46
##   iron_mg_gram manganese_mg_gram
##          <dbl>             <dbl>
## 1          106                46
## 2           75                18

EC classification

dat <- dat %>%
  mutate(ec_class = case_when(
    electrical_conductivity_ds_m < 2 ~ "Non-saline (<2)",
    electrical_conductivity_ds_m < 4 ~ "Slight (2–4)",
    electrical_conductivity_ds_m < 8 ~ "Moderate (4–8)",
    TRUE ~ "Strong (>8)"
  ))

ec_tab <- dat %>%
  count(year, ec_class) %>%
  group_by(year) %>%
  mutate(pct = round(100*n/sum(n),1))

ec_tab
## # A tibble: 5 × 4
## # Groups:   year [2]
##   year  ec_class            n   pct
##   <fct> <chr>           <int> <dbl>
## 1 1995  Non-saline (<2)    73 100  
## 2 2026  Moderate (4–8)     95  51.1
## 3 2026  Non-saline (<2)    29  15.6
## 4 2026  Slight (2–4)       24  12.9
## 5 2026  Strong (>8)        38  20.4
ggplot(ec_tab, aes(x=year, y=pct, fill=ec_class)) +
  geom_col() +
  theme_bw() +
  labs(x="Year", y="Samples (%)", fill="EC class")

wilcox.test(electrical_conductivity_ds_m ~ year, data=dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  electrical_conductivity_ds_m by year
## W = 0, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

Series summary

series_summary <- dat %>%
  group_by(year, soil_series) %>%
  summarise(
    med_ec = median(electrical_conductivity_ds_m, na.rm=TRUE),
    med_k  = median(potassium_mg_100g, na.rm=TRUE),
    med_ca = median(calcium_mg_100g, na.rm=TRUE),
    .groups="drop"
  )

series_summary
## # A tibble: 4 × 5
##   year  soil_series med_ec med_k med_ca
##   <fct> <fct>        <dbl> <dbl>  <dbl>
## 1 1995  Hatia         0    0.315   6.6 
## 2 1995  Ramgati       0    0.37    6.8 
## 3 2026  Hatia         5.9  0.28    3.78
## 4 2026  Ramgati       4.65 0.22    3.80
ggplot(dat, aes(x=soil_series, y=electrical_conductivity_ds_m)) +
  geom_boxplot() +
  facet_wrap(~year) +
  theme_bw() +
  labs(x="Soil series", y="EC (dS/m)")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

P-value of all variable

library(rstatix)

all_tests <- dat %>%
  select(year, all_of(vars)) %>%
  pivot_longer(-year, names_to="variable", values_to="value") %>%
  group_by(variable) %>%
  wilcox_test(value ~ year) %>%
  adjust_pvalue(method = "BH") %>%
  arrange(p.adj)

all_tests
## # A tibble: 14 × 9
##    variable          .y.   group1 group2    n1    n2 statistic        p    p.adj
##    <chr>             <chr> <chr>  <chr>  <int> <int>     <dbl>    <dbl>    <dbl>
##  1 electrical_condu… value 1995   2026      73   185        0  4.29e-36 6.01e-35
##  2 calcium_mg_100g   value 1995   2026      73   186    11846. 1.12e-20 7.84e-20
##  3 magnesium_mg_100g value 1995   2026      73   186    11596. 7.93e-19 3.70e-18
##  4 manganese_mg_gram value 1995   2026      73   186    11496  3.90e-18 1.37e-17
##  5 zinc_mg_gram      value 1995   2026      73   186    11314. 5.98e-17 1.67e-16
##  6 sulphur_mg_gram   value 1995   2026      73   186     2494. 2.39e-15 5.58e-15
##  7 total_n_percent   value 1995   2026      73   186     2650. 1.6 e-14 3.20e-14
##  8 ph                value 1995   2026      73   186    10916. 2.52e-14 4.41e-14
##  9 phosphorus_olsen… value 1995   2026      73    36     2400  2.61e-12 4.06e-12
## 10 potassium_mg_100g value 1995   2026      73   186     9938  6.32e- 9 8.85e- 9
## 11 organic_matter_p… value 1995   2026      73   186     3652  7.31e- 9 9.30e- 9
## 12 boron_mg_gram     value 1995   2026      73   186     9568. 2.92e- 7 3.41e- 7
## 13 iron_mg_gram      value 1995   2026      73   186     8846  1.5 e- 4 1.62e- 4
## 14 cupper_mg_gram    value 1995   2026      73   186     8831  1.66e- 4 1.66e- 4

PCA cluster

pca_vars <- c(
  "electrical_conductivity_ds_m","ph","organic_matter_percent","total_n_percent",
  "potassium_mg_100g","sulphur_mg_gram",
  "zinc_mg_gram","boron_mg_gram","calcium_mg_100g",
  "magnesium_mg_100g","cupper_mg_gram",
  "iron_mg_gram","manganese_mg_gram"
)

pca_df <- dat %>%
  select(year, all_of(pca_vars)) %>%
  na.omit()

library(FactoMineR)
library(factoextra)

pca <- PCA(pca_df[, -1], scale.unit=TRUE, graph=FALSE)

fviz_pca_ind(pca,
             habillage=pca_df$year,
             addEllipses=TRUE,
             title="PCA of topsoil chemistry (1995 vs 2026)")

fviz_pca_var(pca, repel=TRUE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

pca$var$coord
##                                   Dim.1       Dim.2       Dim.3       Dim.4
## electrical_conductivity_ds_m -0.6696464 -0.19235578  0.31310803  0.41415935
## ph                            0.5264493 -0.52329106  0.05874481  0.16844819
## organic_matter_percent       -0.4528725  0.76788431  0.18631072 -0.15111161
## total_n_percent              -0.5617058  0.67545444  0.26380393 -0.06934451
## potassium_mg_100g             0.3694817 -0.07699634  0.74277337  0.04546857
## sulphur_mg_gram              -0.4787947 -0.07553709  0.34305319  0.69942923
## zinc_mg_gram                  0.5206378  0.22312491  0.50717293 -0.19312508
## boron_mg_gram                 0.4306020  0.24483822 -0.21406976  0.56791394
## calcium_mg_100g               0.6214037 -0.06053482  0.13277447  0.09873231
## magnesium_mg_100g             0.6377742 -0.03464042  0.38586758 -0.13199362
## cupper_mg_gram                0.3241769  0.64655797  0.08690751  0.08773073
## iron_mg_gram                  0.3890582  0.56022677 -0.32289687  0.25984488
## manganese_mg_gram             0.6753514  0.40967188 -0.15090469  0.22011707
##                                    Dim.5
## electrical_conductivity_ds_m -0.16201551
## ph                           -0.10460917
## organic_matter_percent        0.23703749
## total_n_percent               0.22657893
## potassium_mg_100g            -0.23132582
## sulphur_mg_gram               0.04622416
## zinc_mg_gram                 -0.26179384
## boron_mg_gram                 0.34491122
## calcium_mg_100g               0.56824584
## magnesium_mg_100g             0.39464550
## cupper_mg_gram               -0.29050062
## iron_mg_gram                 -0.25738319
## manganese_mg_gram            -0.19828873
round(pca$var$coord[,1], 3)
## electrical_conductivity_ds_m                           ph 
##                       -0.670                        0.526 
##       organic_matter_percent              total_n_percent 
##                       -0.453                       -0.562 
##            potassium_mg_100g              sulphur_mg_gram 
##                        0.369                       -0.479 
##                 zinc_mg_gram                boron_mg_gram 
##                        0.521                        0.431 
##              calcium_mg_100g            magnesium_mg_100g 
##                        0.621                        0.638 
##               cupper_mg_gram                 iron_mg_gram 
##                        0.324                        0.389 
##            manganese_mg_gram 
##                        0.675

SQTI calculation

# 1) Selected variables
sqi_vars <- c(
  "electrical_conductivity_ds_m",
  "manganese_mg_gram",
  "magnesium_mg_100g",
  "calcium_mg_100g",
  "total_n_percent",
  "ph",
  "zinc_mg_gram"
)

sqi_df <- dat %>% select(year, all_of(sqi_vars)) %>% na.omit()

# 2) Min-Max normalization function
normalize_more_better <- function(x){
  (x - min(x, na.rm=TRUE)) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
}

normalize_less_better <- function(x){
  (max(x, na.rm=TRUE) - x) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
}

# 3) Apply normalization
sqi_df <- sqi_df %>%
  mutate(
    ec_score  = normalize_less_better(electrical_conductivity_ds_m),
    n_score   = normalize_less_better(total_n_percent),
    mn_score  = normalize_more_better(manganese_mg_gram),
    mg_score  = normalize_more_better(magnesium_mg_100g),
    ca_score  = normalize_more_better(calcium_mg_100g),
    ph_score  = normalize_more_better(ph),
    zn_score  = normalize_more_better(zinc_mg_gram)
  )

# 4) Equal weight SQTI
sqi_df$SQTI <- rowMeans(sqi_df[,c("ec_score","n_score","mn_score",
                                  "mg_score","ca_score","ph_score","zn_score")])

# 5) Compare years
sqi_summary <- sqi_df %>%
  group_by(year) %>%
  summarise(mean_SQTI = mean(SQTI),
            median_SQTI = median(SQTI))

sqi_summary
## # A tibble: 2 × 3
##   year  mean_SQTI median_SQTI
##   <fct>     <dbl>       <dbl>
## 1 1995      0.539       0.536
## 2 2026      0.343       0.335
wilcox.test(SQTI ~ year, data=sqi_df)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  SQTI by year
## W = 13489, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0

Fiq-1

library(dplyr)
library(ggplot2)

# Create salinity class
dat$ec_class <- cut(dat$electrical_conductivity_ds_m,
                    breaks = c(-Inf, 2, 4, 8, Inf),
                    labels = c("Non-saline (<2)",
                               "Slight (2–4)",
                               "Moderate (4–8)",
                               "Strong (>8)"))

# Percentage table
ec_tab <- dat %>%
  count(year, ec_class) %>%
  group_by(year) %>%
  mutate(pct = 100 * n / sum(n))

# Plot
fig1 <- ggplot(ec_tab, aes(x = year, y = pct, fill = ec_class)) +
  geom_col() +
  theme_bw() +
  labs(x = "Year",
       y = "Samples (%)",
       fill = "EC class",
       title = "Decadal shift in salinity class distribution")

fig1

fiq-2

library(FactoMineR)
library(factoextra)

pca_vars <- c(
  "electrical_conductivity_ds_m","ph","organic_matter_percent",
  "total_n_percent","potassium_mg_100g","sulphur_mg_gram",
  "zinc_mg_gram","boron_mg_gram","calcium_mg_100g",
  "magnesium_mg_100g","cupper_mg_gram",
  "iron_mg_gram","manganese_mg_gram"
)

pca_df <- dat %>%
  select(year, all_of(pca_vars)) %>%
  na.omit()

pca <- PCA(pca_df[, -1], scale.unit = TRUE, graph = FALSE)

fig2 <- fviz_pca_ind(
  pca,
  habillage = pca_df$year,
  addEllipses = TRUE,
  title = "PCA score plot: 1995 vs 2026"
)

fig2

Fiq-3

fig3 <- fviz_pca_var(
  pca,
  repel = TRUE,
  title = "PCA loading plot (salinity–nutrient gradient)"
)

fig3

Fiq-4

library(ggpubr)

fig4 <- ggplot(sqi_df, aes(x = year, y = SQTI)) +
  geom_boxplot(width = 0.6) +
  stat_summary(fun = mean, geom = "point",
               shape = 23, size = 4, fill = "red") +
  stat_compare_means(method = "wilcox.test",
                     label = "p.format") +
  theme_bw() +
  labs(x = "Year",
       y = "Soil Quality Transition Index (SQTI)",
       title = "Decadal decline in Soil Quality Transition Index")

fig4