data = read.csv('karpur.csv')
head(data)
data = data[order(data$k.core, decreasing=TRUE), ]
K = data$k.core


sample = c(1: length(K))


k_percent = (sample * 100) / length(K)
# plot best strighat line between sorted 
xlab = "Portion of Total Samples Having Larger or Equal K "
ylab = "Permeability (md)"
plot(k_percent, K, log =  'y', xlab = xlab, ylab = ylab, pch = 10, cex = 0.5, col = "#001c49")

log_k = log(K)
model = lm(log_k ~ k_percent)
plot(k_percent,log_k, xlab = xlab, ylab = ylab, pch = 10, cex = 0.5, col = "#001c49")
abline(model, col = 'red', lwd = 2)

new_data = data.frame(k_percent  = c(50, 84.1))
predicted_values = predict(model, new_data)
heterogenity_index = (predicted_values[1] - predicted_values[2]) / predicted_values[1]
heterogenity_index
##         1 
## 0.2035464