Dykstra-Parsons coefficient

first, let’s import the data

data<-read.csv("C:/Users/dell/Downloads/Telegram Desktop/karpur.csv")
head(data)
k_core = data$k.core
summary(k_core)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.42   657.33  1591.22  2251.91  3046.82 15600.00

let’s take the permeability and sort in descending order

sorted_k = sort(k_core, decreasing = TRUE)
head(sorted_k)
## [1] 15600.00 14225.31 13544.98 13033.53 11841.74 11117.40

for each permeability, calculate the percent of the samples with permeability greater than that value.

n = length(sorted_k) 
k_percent = c(1:n)/(n+1) 
head(k_percent)
## [1] 0.001219512 0.002439024 0.003658537 0.004878049 0.006097561 0.007317073

polt the data on a log-normal probability graph

plot(
  x = k_percent, 
  y = sorted_k,
  type="l",
  xlab = "% >= k",
  ylab = "k",
  log='y',
  lwd=2
)

let’s fit a straight line to this portion of the data

nearly_linear_k_values = sorted_k[floor(0.1*n): floor(0.9*n)]
nearly_linear_k_percent = k_percent[floor(0.1*n): floor(0.9*n)]
linear_model = lm(log10(nearly_linear_k_values) ~ nearly_linear_k_percent)
summary(linear_model)
## 
## Call:
## lm(formula = log10(nearly_linear_k_values) ~ nearly_linear_k_percent)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19128 -0.03880  0.02148  0.04110  0.07176 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.885895   0.005010   775.7   <2e-16 ***
## nearly_linear_k_percent -1.461904   0.009112  -160.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05402 on 655 degrees of freedom
## Multiple R-squared:  0.9752, Adjusted R-squared:  0.9751 
## F-statistic: 2.574e+04 on 1 and 655 DF,  p-value: < 2.2e-16
plot(
  x = k_percent, 
  y = sorted_k,
  type="l",
  xlab = "% >= k",
  ylab = "k",
  log='y',
  lwd=2
)
abline(linear_model, col="red", lwd=2)

The Dykstra-Parsons coefficient of permeability variation, V, is determined from the graph as:

V=(k50−k84.1)/k50

log_k50_and_84.1 = predict(
  linear_model, 
  data.frame(nearly_linear_k_percent=c(0.50, 0.841))
  )
print(10^log_k50_and_84.1) 
##         1         2 
## 1428.7056  453.3498
k50   = 10^log_k50_and_84.1[1]
k84.1 = 10^log_k50_and_84.1[2]
permeability_variation = (k50 - k84.1)/k50
print(permeability_variation)
##        1 
## 0.682685