# Load necessary libraries and packages
library(ggplot2)
pacman::p_load(rio,psych)
# Load the data
df <- read.csv("C:/Users/hp ZBook/OneDrive/Desktop/karpur.csv")
# Extract permeability data
permeability <- df$k.core
# Create the data frame with permeability in descending order
permeability_sorted <- sort(permeability, decreasing = TRUE)
# Calculate frequency of each unique permeability value
freq <- table(permeability_sorted)
# Calculate the number of samples with larger permeability only (excluding equals)
larger_samples <- rev(cumsum(rev(freq))) - freq  # Subtract the current frequency to exclude equals
# Calculate cumulative frequency distribution
total_samples <- length(permeability)
cumulative_freq <- larger_samples / total_samples
# Create the final data frame
result <- as.data.frame(cbind(
  Permeability =as.numeric(names(freq)),
  Frequency = as.vector(freq),
  LargerSamples = larger_samples,
  CumulativeFreq = cumulative_freq))
# Plot Cumulative Frequency (x-axis) vs Permeability (y-axis)

ggplot(result, aes(x = CumulativeFreq, y = Permeability)) +
  geom_point() +
  geom_line(aes(y = Permeability), color = "blue") +
  scale_y_log10() +  # Log scale for permeability
  ggtitle("Log-normal Permeability Distribution") +
  xlab("Cumulative Frequency") +
  ylab("Permeability (Log Scale)") +
  geom_smooth(method = "lm", se = FALSE, linetype = 2) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# The dashed line represents the best straight line through the data, weighted more towards the central portion.
# Fit a linear regression model: log10(Permeability) vs CumulativeFreq
fit <- lm(log10(Permeability) ~ CumulativeFreq, data = result)
# Print the model summary
summary(fit)
## 
## Call:
## lm(formula = log10(Permeability) ~ CumulativeFreq, data = result)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54817 -0.09025  0.05636  0.13673  0.18589 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.02168    0.01668   241.1   <2e-16 ***
## CumulativeFreq -1.85253    0.02877   -64.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2359 on 807 degrees of freedom
## Multiple R-squared:  0.8371, Adjusted R-squared:  0.8369 
## F-statistic:  4148 on 1 and 807 DF,  p-value: < 2.2e-16
# Predict permeability for cumulative frequency = 0.5
k50_log <- predict(fit, newdata = data.frame(CumulativeFreq = 0.5))
k50=10^k50_log
print(paste("k50= ",k50))
## [1] "k50=  1245.72065056771"
# Predict permeability for cumulative frequency =0.841
k84.1_log<-predict(fit,data.frame(CumulativeFreq=0.841))
k84.1=10^k84.1_log
print(paste("k84.1= ",k84.1))
## [1] "k84.1=  290.876808568704"
#Calculte reservoir heterogeneity index from Dykstra and Parsons equation
Reservoir_Heterogeneity_Index=(k50-k84.1)/k50
if (Reservoir_Heterogeneity_Index == 0) {
  classification <- "Ideal homogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0 && Reservoir_Heterogeneity_Index <= 0.25) {
  classification <- "Slightly heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0.25 && Reservoir_Heterogeneity_Index <= 0.50) {
  classification <- "Heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0.50 && Reservoir_Heterogeneity_Index <= 0.75) {
  classification <- "Very heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0.75 && Reservoir_Heterogeneity_Index < 1) {
  classification <- "Extremely heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index == 1) {
  classification <- "Perfectly heterogeneous reservoir"
} else {
  classification <- "Out of expected range"
}

print(paste("Reservoir_Heterogeneity_Index =", Reservoir_Heterogeneity_Index))
## [1] "Reservoir_Heterogeneity_Index = 0.766499167822141"
print(classification)
## [1] "Extremely heterogeneous reservoir"
#Applying the same steps but on a coorected permeability cpre data
df <- import("C:/Users/hp ZBook/OneDrive/Desktop/karpur.csv")
Corrected.prosity.model <- lm(phi.core ~ phi.N+Facies-1,data=df)
phi.core.corr<-predict(Corrected.prosity.model,data=df)
Corrected.permeability.model <-lm(k.core~phi.core.corr+Facies-1,df)
k.core.corr<-predict(Corrected.permeability.model,data=df)
permeability <- k.core.corr 
permeability_sorted <- sort(permeability, decreasing = TRUE)
freq <- table(permeability_sorted)
larger_samples <- rev(cumsum(rev(freq))) - freq
total_samples <- length(permeability)
cumulative_freq <- larger_samples / total_samples
result <- as.data.frame(cbind(
  Permeability =as.numeric(names(freq)),
  Frequency = as.vector(freq),
  LargerSamples = larger_samples,
  CumulativeFreq = cumulative_freq))
fit <- lm(log10(Permeability) ~ CumulativeFreq, data = result)
summary(fit)
## 
## Call:
## lm(formula = log10(Permeability) ~ CumulativeFreq, data = result)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54492 -0.04761  0.01362  0.08916  0.14310 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.82630    0.01421  269.24   <2e-16 ***
## CumulativeFreq -1.23154    0.02431  -50.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1527 on 435 degrees of freedom
## Multiple R-squared:  0.855,  Adjusted R-squared:  0.8547 
## F-statistic:  2566 on 1 and 435 DF,  p-value: < 2.2e-16
#Calculate reservoir heterogeneity index for corrected core data
K50_log <- predict(fit, newdata = data.frame(CumulativeFreq = 0.5))
K84.1_log<-predict(fit,data.frame(CumulativeFreq=0.841))
K50=10^K50_log
K84.1=10^K84.1_log
Reservoir_Heterogeneity_Index=(K50-K84.1)/K50
if (Reservoir_Heterogeneity_Index == 0) {
  classification <- "Ideal homogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0 && Reservoir_Heterogeneity_Index <= 0.25) {
  classification <- "Slightly heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0.25 && Reservoir_Heterogeneity_Index <= 0.50) {
  classification <- "Heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0.50 && Reservoir_Heterogeneity_Index <= 0.75) {
  classification <- "Very heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index > 0.75 && Reservoir_Heterogeneity_Index < 1) {
  classification <- "Extremely heterogeneous reservoir"
} else if (Reservoir_Heterogeneity_Index == 1) {
  classification <- "Perfectly heterogeneous reservoir"
} else {
  classification <- "Out of expected range"
}

print(paste("Reservoir_Heterogeneity_Index =", Reservoir_Heterogeneity_Index))
## [1] "Reservoir_Heterogeneity_Index = 0.619772360285649"
print(classification)
## [1] "Very heterogeneous reservoir"