In the Modeling groundwater nitrate concentrations in private wells in Iowa paper, they defined slope length as the average length of sloping land within 1 km of a well.So the method in the paper uses: AvgFlowpathLength_1km: Mean flowpath length within a 1 km buffer around each well

#Slope length in this context is the average length of sloping land within 1 km of a well.

cat("\014"); rm(list = ls())
library(dataRetrieval)
library(sf)
library(leaflet)
library(dplyr)
library(tidyr)
library(purrr)
library(tibble)
library(ggplot2)
library(base64enc)
library(lubridate)
library(patchwork)
library(grid)
library(stringr)
library(wql)
library(measurements)
library(tidyverse)
library(openair)
library(heatwaveR)
library(data.table)
require(methods)
library(investr)
library(gridExtra)
library(Metrics)
library(scales)
library(cluster)
library(factoextra)
library(dendextend)
library(elevatr)
library(raster)
library(terra)
library(whitebox)


working_dir <- "C:/Users/a905h226/OneDrive - University of Kansas/Documents"
setwd(working_dir)

Sys.setenv(R_WHITEBOX_EXE_PATH = "C:/WhiteboxTools/whitebox_tools.exe")
wbt_init()
wbt_version()






file_Path_Variable_O<- "C:/Users/a905h226/OneDrive - University of Kansas/Desktop/KGS project GW/Step By Step Code/Output"
file_Path_Variable_I <- "C:/Users/a905h226/OneDrive - University of Kansas/Desktop/KGS project GW/Step By Step Code/Input"

GMD_shapefile_path <- file.path(file_Path_Variable_I, "Groundwater_Management_Districts_(GMD)/Groundwater_Management_Districts_(GMD).shp")
GMD_shp <- st_read(GMD_shapefile_path)
Reading layer `Groundwater_Management_Districts_(GMD)' from data source 
  `C:\Users\a905h226\OneDrive - University of Kansas\Desktop\KGS project GW\Step By Step Code\Input\Groundwater_Management_Districts_(GMD)\Groundwater_Management_Districts_(GMD).shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 5 features and 13 fields
Geometry type: POLYGON
Dimension:     XY
Bounding box:  xmin: -11360320 ymin: 4438158 xmax: -10835300 ymax: 4847409
Projected CRS: WGS 84 / Pseudo-Mercator
GMD_shp <- st_transform(GMD_shp, crs = 4326)
cities_shapefile_path <- file.path(file_Path_Variable_I, "Kansas state city shapefile/Census_2021_Incorporated_Areas.shp")
cities <- st_read(cities_shapefile_path)
Reading layer `Census_2021_Incorporated_Areas' from data source 
  `C:\Users\a905h226\OneDrive - University of Kansas\Desktop\KGS project GW\Step By Step Code\Input\Kansas state city shapefile\Census_2021_Incorporated_Areas.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 740 features and 16 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -11359630 ymin: 4438216 xmax: -10529530 ymax: 4866273
Projected CRS: WGS 84 / Pseudo-Mercator
cities <- st_transform(cities, crs = 4326)

StationDataWD<- readRDS(file.path(file_Path_Variable_O, "StationData_SeperateDF_depthAdded_allfocusClean_step2.rds"))
StationData_withLocation<- readRDS(file.path(file_Path_Variable_O, "StationData_withLocation.rds"))
StationDataWOD<- readRDS(file.path(file_Path_Variable_O, "StationData_SeperateDF.rds"))




FinalData_cleaned<-StationDataWD
FinalData_cleaned<- FinalData_cleaned %>% left_join(StationData_withLocation, by='Well_ID')
FinalData_cleaned<- na.omit(FinalData_cleaned)

wbt_init()
wbt_version()



GMD2 <- GMD_shp %>% 
  filter(GMD_ == 2) %>% 
  st_transform(4326)  
#Retrieved elevation data for GMD2
dem <- get_elev_raster(locations = GMD2, z = 10, clip = "locations") #digital elevation
plot(dem)

wbt_fill_depressions(
  dem = "gmd2_dem.tif",
  output = "filled_dem_Updated.tif"
)

stopifnot(file.exists("filled_dem_Updated.tif"))

#Filled depressions in DEM using WhiteboxTools
if ("wbt_average_upslope_flowpath_length" %in% ls("package:whitebox")) {
  wbt_average_upslope_flowpath_length(
    dem = "filled_dem_Updated.tif",
    output = "avg_upslope_length.tif"
  )
} else {
  stop("Function `wbt_average_upslope_flowpath_length()` not found in whitebox R package.")
}

avg_flowpath_rast <- rast("avg_upslope_length.tif")


well_points <- FinalData_cleaned %>%
  st_as_sf(coords = c("Lon", "Lat"), crs = 4326) 

well_points <- st_transform(well_points, crs(avg_flowpath_rast))


buffers <- st_buffer(well_points, dist = 1000) %>%
  st_transform(crs(avg_flowpath_rast))

buffer_mean_vals <- terra::extract(avg_flowpath_rast, vect(buffers), fun = mean, na.rm = TRUE)

FinalData_cleaned$AvgFlowpathLength_1km <- buffer_mean_vals[, 2]

head(FinalData_cleaned %>% dplyr::select(Well_ID, AvgFlowpathLength_1km))
# A tibble: 6 × 2
# Groups:   Well_ID [6]
  Well_ID AvgFlowpathLength_1km
  <chr>                   <dbl>
1 EB244A               0.000902
2 P26A                 0.00125 
3 P27A                 0.000975
4 P28A                 0.00109 
5 P35A                 0.00172 
6 P29A                 0.000960
FinalData_cleaned <- FinalData_cleaned %>%
  mutate(
    mean_nitrate = map_dbl(cleaned_nitrate_data, ~ mean(.x$nitrate_values, na.rm = TRUE))
  )

well_sf <- FinalData_cleaned %>%
  st_as_sf(coords = c("Lon", "Lat"), crs = 4326)
ggplot() +
  geom_sf(data = GMD2, fill = NA, color = "gray40") +
  geom_sf(data = well_sf, aes(color = AvgFlowpathLength_1km, size = mean_nitrate), alpha = 0.8) +
  scale_color_viridis_c(name = "Slope Length (m)", option = "C") +
  scale_size_continuous(name = "Mean Nitrate (mg/L)", range = c(1, 6)) +
  theme_minimal() +
  labs(
    title = "Mean Nitrate vs. Slope Length (1 km Buffer)",
    subtitle = "Size = Nitrate, Color = Slope Length",
    caption = "Larger circles indicate higher nitrate levels"
  )

ggplot(FinalData_cleaned, aes(x = AvgFlowpathLength_1km, y = mean_nitrate)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", color = "red") +
  scale_x_log10() +
  labs(x = "Avg. Flowpath Length (log scale, m)", y = "Mean Nitrate (mg/L)",
       title = "Slope Length vs. Nitrate (Log Scale)")

summary(lm(mean_nitrate ~ log10(AvgFlowpathLength_1km), data = FinalData_cleaned))

Call:
lm(formula = mean_nitrate ~ log10(AvgFlowpathLength_1km), data = FinalData_cleaned)

Residuals:
   Min     1Q Median     3Q    Max 
-4.199 -2.962 -1.866  1.028 22.008 

Coefficients:
                             Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   -17.528      4.805  -3.648 0.000291 ***
log10(AvgFlowpathLength_1km)   -6.840      1.597  -4.282 2.22e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.537 on 509 degrees of freedom
  (51 observations deleted due to missingness)
Multiple R-squared:  0.03477,   Adjusted R-squared:  0.03287 
F-statistic: 18.33 on 1 and 509 DF,  p-value: 2.215e-05

Conclusion

We found a significant negative relationship between average upslope flowpath length and mean nitrate concentrations. Wells in flatter areas (with shorter slope lengths) tend to have higher nitrate levels. However, slope length alone only explains a small portion of nitrate variability