if echo = false, then no code will show in output
Packages
Loading required packages:
library(ggplot2)
library(dplyr)
#install.packages("factoextra")
library(readxl)
library(tidyr)
library(janitor)
library(stringr)
library(ggplot2)
library(rstatix)
library(effsize)
library(FactoMineR)
library(factoextra)
Data
d95 <- read_excel("Hatia_1995.xlsx") %>% clean_names() %>% mutate(year = 1995)
## Warning in warn_micro_mu(string = string, replace = replace): Watch out! The mu or micro symbol is in the input string, and may have been converted to 'm' while 'u' may have been expected. Consider adding the following to the `replace` argument:
## The following characters are in the names to clean but are not replaced: \u00b5
d26 <- read_excel("Hatia_2026.xlsx") %>% clean_names() %>% mutate(year = 2026)
## Warning in warn_micro_mu(string = string, replace = replace): Watch out! The mu or micro symbol is in the input string, and may have been converted to 'm' while 'u' may have been expected. Consider adding the following to the `replace` argument:
## The following characters are in the names to clean but are not replaced: \u00b5
## [1] 73 19
## [1] 186 20
## # A tibble: 3 × 19
## sl_no soil_series land_type soil_texture ph electrical_conductivity_ds_m
## <dbl> <chr> <chr> <lgl> <dbl> <dbl>
## 1 1 Ramgati MHL NA 6.6 0.1
## 2 2 Ramgati MHL NA 7 0
## 3 3 Hatia MHL NA 6.6 0.1
## # ℹ 13 more variables: organic_matter_percent <dbl>, total_n_percent <dbl>,
## # phosphorus_olsen_mg_gram <dbl>, potassium_mg_100g <dbl>,
## # sulphur_mg_gram <dbl>, zinc_mg_gram <dbl>, boron_mg_gram <dbl>,
## # calcium_mg_100g <dbl>, magnesium_mg_100g <dbl>, cupper_mg_gram <dbl>,
## # iron_mg_gram <dbl>, manganese_mg_gram <dbl>, year <dbl>
## # A tibble: 3 × 20
## sl_no soil_series land_type soil_texture ph electrical_conductivity_ds_m
## <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 1 Ramgati MHL Loam 7.8 4.7
## 2 2 Ramgati MHL Loam 7.7 3.7
## 3 3 Ramgati MHL Loam 7.4 6.2
## # ℹ 14 more variables: organic_matter_percent <dbl>, total_n_percent <dbl>,
## # phosphorus_olsen_mg_gram <chr>, phosphorus_bray_mg_gram <chr>,
## # potassium_mg_100g <dbl>, sulphur_mg_gram <dbl>, zinc_mg_gram <dbl>,
## # boron_mg_gram <dbl>, calcium_mg_100g <dbl>, magnesium_mg_100g <dbl>,
## # cupper_mg_gram <dbl>, iron_mg_gram <dbl>, manganese_mg_gram <dbl>,
## # year <dbl>
## [1] "sl_no" "soil_series"
## [3] "land_type" "soil_texture"
## [5] "ph" "electrical_conductivity_ds_m"
## [7] "organic_matter_percent" "total_n_percent"
## [9] "phosphorus_olsen_mg_gram" "potassium_mg_100g"
## [11] "sulphur_mg_gram" "zinc_mg_gram"
## [13] "boron_mg_gram" "calcium_mg_100g"
## [15] "magnesium_mg_100g" "cupper_mg_gram"
## [17] "iron_mg_gram" "manganese_mg_gram"
## [19] "year"
## [1] "sl_no" "soil_series"
## [3] "land_type" "soil_texture"
## [5] "ph" "electrical_conductivity_ds_m"
## [7] "organic_matter_percent" "total_n_percent"
## [9] "phosphorus_olsen_mg_gram" "phosphorus_bray_mg_gram"
## [11] "potassium_mg_100g" "sulphur_mg_gram"
## [13] "zinc_mg_gram" "boron_mg_gram"
## [15] "calcium_mg_100g" "magnesium_mg_100g"
## [17] "cupper_mg_gram" "iron_mg_gram"
## [19] "manganese_mg_gram" "year"
## [1] "C:/Users/User/Desktop/Hatia upazila publication/Hatia code"
## [1] "Hatia code.Rproj" "Hatia.R" "Hatia.Rmd" "Hatia_1995.xlsx"
## [5] "Hatia_2026.xlsx" "Hatia_cache"
to_num <- function(x){
x <- trimws(as.character(x))
x[x %in% c("", "-", "NA", "N/A")] <- NA
suppressWarnings(as.numeric(x))
}
num_cols <- c(
"ph","electrical_conductivity_ds_m","organic_matter_percent","total_n_percent",
"phosphorus_olsen_mg_gram","phosphorus_bray_mg_gram","potassium_mg_100g",
"sulphur_mg_gram","zinc_mg_gram","boron_mg_gram","calcium_mg_100g",
"magnesium_mg_100g","cupper_mg_gram","iron_mg_gram","manganese_mg_gram"
)
d95 <- d95 %>% mutate(across(any_of(num_cols), to_num))
d26 <- d26 %>% mutate(across(any_of(num_cols), to_num))
str(d95$electrical_conductivity_ds_m)
## num [1:73] 0.1 0 0.1 0 0.1 0.1 0 0.1 0 0 ...
str(d26$electrical_conductivity_ds_m)
## num [1:186] 4.7 3.7 6.2 4.1 4.3 4.9 6 2.4 6.1 0.6 ...
# Combine data
dat <- bind_rows(d95, d26) %>%
mutate(
year = as.factor(year),
soil_series = as.factor(soil_series),
land_type = as.factor(land_type),
soil_texture = as.factor(soil_texture)
)
dim(dat)
## [1] 259 20
##
## 1995 2026
## 73 186
str(d26$electrical_conductivity_ds_m)
## num [1:186] 4.7 3.7 6.2 4.1 4.3 4.9 6 2.4 6.1 0.6 ...
##
## 1995 2026
## 73 186
EC classification
dat <- dat %>%
mutate(ec_class = case_when(
electrical_conductivity_ds_m < 2 ~ "Non-saline (<2)",
electrical_conductivity_ds_m < 4 ~ "Slight (2–4)",
electrical_conductivity_ds_m < 8 ~ "Moderate (4–8)",
TRUE ~ "Strong (>8)"
))
ec_tab <- dat %>%
count(year, ec_class) %>%
group_by(year) %>%
mutate(pct = round(100*n/sum(n),1))
ec_tab
## # A tibble: 5 × 4
## # Groups: year [2]
## year ec_class n pct
## <fct> <chr> <int> <dbl>
## 1 1995 Non-saline (<2) 73 100
## 2 2026 Moderate (4–8) 95 51.1
## 3 2026 Non-saline (<2) 29 15.6
## 4 2026 Slight (2–4) 24 12.9
## 5 2026 Strong (>8) 38 20.4
ggplot(ec_tab, aes(x=year, y=pct, fill=ec_class)) +
geom_col() +
theme_bw() +
labs(x="Year", y="Samples (%)", fill="EC class")

wilcox.test(electrical_conductivity_ds_m ~ year, data=dat)
##
## Wilcoxon rank sum test with continuity correction
##
## data: electrical_conductivity_ds_m by year
## W = 0, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
Series summary
series_summary <- dat %>%
group_by(year, soil_series) %>%
summarise(
med_ec = median(electrical_conductivity_ds_m, na.rm=TRUE),
med_k = median(potassium_mg_100g, na.rm=TRUE),
med_ca = median(calcium_mg_100g, na.rm=TRUE),
.groups="drop"
)
series_summary
## # A tibble: 4 × 5
## year soil_series med_ec med_k med_ca
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 1995 Hatia 0 0.315 6.6
## 2 1995 Ramgati 0 0.37 6.8
## 3 2026 Hatia 5.9 0.28 3.78
## 4 2026 Ramgati 4.65 0.22 3.80
ggplot(dat, aes(x=soil_series, y=electrical_conductivity_ds_m)) +
geom_boxplot() +
facet_wrap(~year) +
theme_bw() +
labs(x="Soil series", y="EC (dS/m)")
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).

P-value of all variable
library(rstatix)
all_tests <- dat %>%
select(year, all_of(vars)) %>%
pivot_longer(-year, names_to="variable", values_to="value") %>%
group_by(variable) %>%
wilcox_test(value ~ year) %>%
adjust_pvalue(method = "BH") %>%
arrange(p.adj)
all_tests
## # A tibble: 14 × 9
## variable .y. group1 group2 n1 n2 statistic p p.adj
## <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 electrical_condu… value 1995 2026 73 185 0 4.29e-36 6.01e-35
## 2 calcium_mg_100g value 1995 2026 73 186 11846. 1.12e-20 7.84e-20
## 3 magnesium_mg_100g value 1995 2026 73 186 11596. 7.93e-19 3.70e-18
## 4 manganese_mg_gram value 1995 2026 73 186 11496 3.90e-18 1.37e-17
## 5 zinc_mg_gram value 1995 2026 73 186 11314. 5.98e-17 1.67e-16
## 6 sulphur_mg_gram value 1995 2026 73 186 2494. 2.39e-15 5.58e-15
## 7 total_n_percent value 1995 2026 73 186 2650. 1.6 e-14 3.20e-14
## 8 ph value 1995 2026 73 186 10916. 2.52e-14 4.41e-14
## 9 phosphorus_olsen… value 1995 2026 73 36 2400 2.61e-12 4.06e-12
## 10 potassium_mg_100g value 1995 2026 73 186 9938 6.32e- 9 8.85e- 9
## 11 organic_matter_p… value 1995 2026 73 186 3652 7.31e- 9 9.30e- 9
## 12 boron_mg_gram value 1995 2026 73 186 9568. 2.92e- 7 3.41e- 7
## 13 iron_mg_gram value 1995 2026 73 186 8846 1.5 e- 4 1.62e- 4
## 14 cupper_mg_gram value 1995 2026 73 186 8831 1.66e- 4 1.66e- 4
PCA cluster
pca_vars <- c(
"electrical_conductivity_ds_m","ph","organic_matter_percent","total_n_percent",
"potassium_mg_100g","sulphur_mg_gram",
"zinc_mg_gram","boron_mg_gram","calcium_mg_100g",
"magnesium_mg_100g","cupper_mg_gram",
"iron_mg_gram","manganese_mg_gram"
)
pca_df <- dat %>%
select(year, all_of(pca_vars)) %>%
na.omit()
library(FactoMineR)
library(factoextra)
pca <- PCA(pca_df[, -1], scale.unit=TRUE, graph=FALSE)
fviz_pca_ind(pca,
habillage=pca_df$year,
addEllipses=TRUE,
title="PCA of topsoil chemistry (1995 vs 2026)")

fviz_pca_var(pca, repel=TRUE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Dim.1 Dim.2 Dim.3 Dim.4
## electrical_conductivity_ds_m -0.6696464 -0.19235578 0.31310803 0.41415935
## ph 0.5264493 -0.52329106 0.05874481 0.16844819
## organic_matter_percent -0.4528725 0.76788431 0.18631072 -0.15111161
## total_n_percent -0.5617058 0.67545444 0.26380393 -0.06934451
## potassium_mg_100g 0.3694817 -0.07699634 0.74277337 0.04546857
## sulphur_mg_gram -0.4787947 -0.07553709 0.34305319 0.69942923
## zinc_mg_gram 0.5206378 0.22312491 0.50717293 -0.19312508
## boron_mg_gram 0.4306020 0.24483822 -0.21406976 0.56791394
## calcium_mg_100g 0.6214037 -0.06053482 0.13277447 0.09873231
## magnesium_mg_100g 0.6377742 -0.03464042 0.38586758 -0.13199362
## cupper_mg_gram 0.3241769 0.64655797 0.08690751 0.08773073
## iron_mg_gram 0.3890582 0.56022677 -0.32289687 0.25984488
## manganese_mg_gram 0.6753514 0.40967188 -0.15090469 0.22011707
## Dim.5
## electrical_conductivity_ds_m -0.16201551
## ph -0.10460917
## organic_matter_percent 0.23703749
## total_n_percent 0.22657893
## potassium_mg_100g -0.23132582
## sulphur_mg_gram 0.04622416
## zinc_mg_gram -0.26179384
## boron_mg_gram 0.34491122
## calcium_mg_100g 0.56824584
## magnesium_mg_100g 0.39464550
## cupper_mg_gram -0.29050062
## iron_mg_gram -0.25738319
## manganese_mg_gram -0.19828873
round(pca$var$coord[,1], 3)
## electrical_conductivity_ds_m ph
## -0.670 0.526
## organic_matter_percent total_n_percent
## -0.453 -0.562
## potassium_mg_100g sulphur_mg_gram
## 0.369 -0.479
## zinc_mg_gram boron_mg_gram
## 0.521 0.431
## calcium_mg_100g magnesium_mg_100g
## 0.621 0.638
## cupper_mg_gram iron_mg_gram
## 0.324 0.389
## manganese_mg_gram
## 0.675
SQTI calculation
# 1) Selected variables
sqi_vars <- c(
"electrical_conductivity_ds_m",
"manganese_mg_gram",
"magnesium_mg_100g",
"calcium_mg_100g",
"total_n_percent",
"ph",
"zinc_mg_gram"
)
sqi_df <- dat %>% select(year, all_of(sqi_vars)) %>% na.omit()
# 2) Min-Max normalization function
normalize_more_better <- function(x){
(x - min(x, na.rm=TRUE)) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
}
normalize_less_better <- function(x){
(max(x, na.rm=TRUE) - x) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
}
# 3) Apply normalization
sqi_df <- sqi_df %>%
mutate(
ec_score = normalize_less_better(electrical_conductivity_ds_m),
n_score = normalize_less_better(total_n_percent),
mn_score = normalize_more_better(manganese_mg_gram),
mg_score = normalize_more_better(magnesium_mg_100g),
ca_score = normalize_more_better(calcium_mg_100g),
ph_score = normalize_more_better(ph),
zn_score = normalize_more_better(zinc_mg_gram)
)
# 4) Equal weight SQTI
sqi_df$SQTI <- rowMeans(sqi_df[,c("ec_score","n_score","mn_score",
"mg_score","ca_score","ph_score","zn_score")])
# 5) Compare years
sqi_summary <- sqi_df %>%
group_by(year) %>%
summarise(mean_SQTI = mean(SQTI),
median_SQTI = median(SQTI))
sqi_summary
## # A tibble: 2 × 3
## year mean_SQTI median_SQTI
## <fct> <dbl> <dbl>
## 1 1995 0.539 0.536
## 2 2026 0.343 0.335
wilcox.test(SQTI ~ year, data=sqi_df)
##
## Wilcoxon rank sum test with continuity correction
##
## data: SQTI by year
## W = 13489, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
Fiq-1
library(dplyr)
library(ggplot2)
# Create salinity class
dat$ec_class <- cut(dat$electrical_conductivity_ds_m,
breaks = c(-Inf, 2, 4, 8, Inf),
labels = c("Non-saline (<2)",
"Slight (2–4)",
"Moderate (4–8)",
"Strong (>8)"))
# Percentage table
ec_tab <- dat %>%
count(year, ec_class) %>%
group_by(year) %>%
mutate(pct = 100 * n / sum(n))
# Plot
fig1 <- ggplot(ec_tab, aes(x = year, y = pct, fill = ec_class)) +
geom_col() +
theme_bw() +
labs(x = "Year",
y = "Samples (%)",
fill = "EC class",
title = "Decadal shift in salinity class distribution")
fig1

fiq-2
library(FactoMineR)
library(factoextra)
pca_vars <- c(
"electrical_conductivity_ds_m","ph","organic_matter_percent",
"total_n_percent","potassium_mg_100g","sulphur_mg_gram",
"zinc_mg_gram","boron_mg_gram","calcium_mg_100g",
"magnesium_mg_100g","cupper_mg_gram",
"iron_mg_gram","manganese_mg_gram"
)
pca_df <- dat %>%
select(year, all_of(pca_vars)) %>%
na.omit()
pca <- PCA(pca_df[, -1], scale.unit = TRUE, graph = FALSE)
fig2 <- fviz_pca_ind(
pca,
habillage = pca_df$year,
addEllipses = TRUE,
title = "PCA score plot: 1995 vs 2026"
)
fig2

Fiq-3
fig3 <- fviz_pca_var(
pca,
repel = TRUE,
title = "PCA loading plot (salinity–nutrient gradient)"
)
fig3

Fiq-4
library(ggpubr)
fig4 <- ggplot(sqi_df, aes(x = year, y = SQTI)) +
geom_boxplot(width = 0.6) +
stat_summary(fun = mean, geom = "point",
shape = 23, size = 4, fill = "red") +
stat_compare_means(method = "wilcox.test",
label = "p.format") +
theme_bw() +
labs(x = "Year",
y = "Soil Quality Transition Index (SQTI)",
title = "Decadal decline in Soil Quality Transition Index")
fig4
