Conduct MLR to model and predict permeability in multiple scenarios

Lubaba Hakeem 29.11.2024


data<-read.csv("C:/Users/HP/Desktop/Rdata/Karpur.csv",header=T)
summary(data)
##      depth         caliper         ind.deep          ind.med       
##  Min.   :5667   Min.   :8.487   Min.   :  6.532   Min.   :  9.386  
##  1st Qu.:5769   1st Qu.:8.556   1st Qu.: 28.799   1st Qu.: 27.892  
##  Median :5872   Median :8.588   Median :217.849   Median :254.383  
##  Mean   :5873   Mean   :8.622   Mean   :275.357   Mean   :273.357  
##  3rd Qu.:5977   3rd Qu.:8.686   3rd Qu.:566.793   3rd Qu.:544.232  
##  Max.   :6083   Max.   :8.886   Max.   :769.484   Max.   :746.028  
##      gamma            phi.N            R.deep            R.med        
##  Min.   : 16.74   Min.   :0.0150   Min.   :  1.300   Min.   :  1.340  
##  1st Qu.: 40.89   1st Qu.:0.2030   1st Qu.:  1.764   1st Qu.:  1.837  
##  Median : 51.37   Median :0.2450   Median :  4.590   Median :  3.931  
##  Mean   : 53.42   Mean   :0.2213   Mean   : 24.501   Mean   : 21.196  
##  3rd Qu.: 62.37   3rd Qu.:0.2640   3rd Qu.: 34.724   3rd Qu.: 35.853  
##  Max.   :112.40   Max.   :0.4100   Max.   :153.085   Max.   :106.542  
##        SP          density.corr          density         phi.core    
##  Min.   :-73.95   Min.   :-0.067000   Min.   :1.758   Min.   :15.70  
##  1st Qu.:-42.01   1st Qu.:-0.016000   1st Qu.:2.023   1st Qu.:23.90  
##  Median :-32.25   Median :-0.007000   Median :2.099   Median :27.60  
##  Mean   :-30.98   Mean   :-0.008883   Mean   :2.102   Mean   :26.93  
##  3rd Qu.:-19.48   3rd Qu.: 0.002000   3rd Qu.:2.181   3rd Qu.:30.70  
##  Max.   : 25.13   Max.   : 0.089000   Max.   :2.387   Max.   :36.30  
##      k.core            Facies         
##  Min.   :    0.42   Length:819        
##  1st Qu.:  657.33   Class :character  
##  Median : 1591.22   Mode  :character  
##  Mean   : 2251.91                     
##  3rd Qu.: 3046.82                     
##  Max.   :15600.00
model1 <- lm(data$phi.core/100 ~ data$phi.N)
summary(model1)
## 
## Call:
## lm(formula = data$phi.core/100 ~ data$phi.N)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.135237 -0.030779  0.009432  0.033563  0.104025 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.30962    0.00485  63.846   <2e-16 ***
## data$phi.N  -0.18207    0.02080  -8.753   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04368 on 817 degrees of freedom
## Multiple R-squared:  0.08573,    Adjusted R-squared:  0.08462 
## F-statistic: 76.61 on 1 and 817 DF,  p-value: < 2.2e-16
plot(data$phi.N,data$phi.core,xlab="phi.log",ylab="phi.core",axes = F)
axis(2,col = "blue",col.axis="black")
axis(1,col = "blue",col.axis="red")
abline(model1, lwd=3, col='green')

par(mfrow=c(1,1))
hist(data$k.core, main='Histogram of Permeability', xlab='Permeability, md', col='darkblue')

model1<-lm(k.core~.-1 ,data=data)
par(mfrow=c(2,2))
plot (model1)

phi.corel <- predict(model1,data)
#cbind(karpur$phi.core/100,phi.corel)
model2<-lm(k.core~phi.corel+Facies-1,data=data)
summary(model2)
## 
## Call:
## lm(formula = k.core ~ phi.corel + Facies - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5585.6  -568.9    49.2   476.5  8928.4 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## phi.corel  1.000e+00  4.690e-02   21.32   <2e-16 ***
## FaciesF1  -2.353e-11  1.617e+02    0.00        1    
## FaciesF10  4.619e-11  1.026e+02    0.00        1    
## FaciesF2   2.128e-11  4.433e+02    0.00        1    
## FaciesF3   1.105e-10  1.766e+02    0.00        1    
## FaciesF5   2.318e-11  2.795e+02    0.00        1    
## FaciesF7  -2.737e-11  4.261e+02    0.00        1    
## FaciesF8  -6.261e-11  1.851e+02    0.00        1    
## FaciesF9  -1.092e-11  1.046e+02    0.00        1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1253 on 810 degrees of freedom
## Multiple R-squared:  0.8457, Adjusted R-squared:  0.844 
## F-statistic: 493.2 on 9 and 810 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model2)

m1.red <- lm(k.core ~ depth+gamma+R.deep+R.med+SP+density+phi.core+Facies-1, data = data)
summary(m1.red)
## 
## Call:
## lm(formula = k.core ~ depth + gamma + R.deep + R.med + SP + density + 
##     phi.core + Facies - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5461.7  -545.5    37.0   505.0  9072.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## depth          8.088      1.059   7.639 6.23e-14 ***
## gamma        -51.915      4.526 -11.471  < 2e-16 ***
## R.deep       -22.467      6.261  -3.588 0.000353 ***
## R.med         48.859      8.730   5.597 2.99e-08 ***
## SP            -6.490      3.118  -2.082 0.037671 *  
## density     2013.607   1047.054   1.923 0.054818 .  
## phi.core     188.002     21.791   8.628  < 2e-16 ***
## FaciesF1  -51703.918   5802.150  -8.911  < 2e-16 ***
## FaciesF10 -50893.923   5917.391  -8.601  < 2e-16 ***
## FaciesF2  -51009.565   5854.625  -8.713  < 2e-16 ***
## FaciesF3  -51322.213   5871.731  -8.741  < 2e-16 ***
## FaciesF5  -51174.044   6008.663  -8.517  < 2e-16 ***
## FaciesF7  -52302.690   5971.976  -8.758  < 2e-16 ***
## FaciesF8  -53362.629   6033.789  -8.844  < 2e-16 ***
## FaciesF9  -54796.231   6100.716  -8.982  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1266 on 804 degrees of freedom
## Multiple R-squared:  0.8436, Adjusted R-squared:  0.8407 
## F-statistic: 289.1 on 15 and 804 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(m1.red)

k.core.pred1 <- predict(m1.red,data)
#cbind(data$k.core ,k.core.pred1 )
par(mfrow=c(1,3))
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="l", col="darkred", lwd = 5, pch=17, xlab='Measured',
     ylab='Depth,m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred1),type="l", col="green", lwd = 5, pch=17, xlab='Predicted',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2,)
grid()
# Matching the two curves for ease of comparison 
par(mfrow=c(1,1))

plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="l", col="darkred", lwd = 5, xlab='Permeability',
                      ylab='Depth,m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred1),type="l", col="blue", lwd = 5, xlab='',
     ylab='Depth,m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='R-sq=8436')
grid()
legend('topright', legend=c("Observed", "Predicted"), lty=c(1,1), col=c("darkred", "blue"))

AdjR.sq1 <- 1-sum((k.core.pred1 - data$k.core))/sum((data$k.core-mean(data$k.core))^2)
AdjR.sq1
## [1] 1
mspe.model1 <- sqrt(sum((k.core.pred1 - data$k.core)^2)/nrow(data))
mspe.model1
## [1] 1254.46
par(mfrow=c(1,1))
                                                                                                         plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="p", col="darkred", lwd = 5, pch=16, xlab='Permeability',
                                                                                                              ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
                                                                                                         grid()
                                                                                                         par(new = TRUE)
                                                                                                         plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred1),type="p", col="green", lwd = 5, pch=15, xlab='',
                                                                                                              ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='AdjR.sq1=0.6847 & RMSE=1254')
                                                                                                         grid()
                                                                                                         legend('topright', legend=c("Observed", "Predicted"), pch=c(16,15), col=c("darkred", "green"))

hist(log10(data$k.core) ,main='Histogram of log Permeability', xlab='log Permeability, md', col='darkred')

m2<-lm(log10(k.core) ~ .-1,data=data)
summary(m2)
## 
## Call:
## lm(formula = log10(k.core) ~ . - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5804 -0.1138  0.0322  0.1529  0.7384 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## depth         0.0007425  0.0004718   1.574   0.1160    
## caliper      -0.4605945  0.2693103  -1.710   0.0876 .  
## ind.deep     -0.0007951  0.0006222  -1.278   0.2017    
## ind.med       0.0007137  0.0006833   1.044   0.2966    
## gamma        -0.0091269  0.0015885  -5.746 1.30e-08 ***
## phi.N        -1.7628155  0.3901024  -4.519 7.16e-06 ***
## R.deep       -0.0025878  0.0016620  -1.557   0.1199    
## R.med         0.0044073  0.0023960   1.839   0.0662 .  
## SP           -0.0016935  0.0008312  -2.037   0.0419 *  
## density.corr  1.4462633  1.2712045   1.138   0.2556    
## density       1.6148374  0.3100921   5.208 2.44e-07 ***
## phi.core      0.0948634  0.0060329  15.724  < 2e-16 ***
## FaciesF1     -2.3461877  4.6532000  -0.504   0.6143    
## FaciesF10    -2.2675417  4.6652182  -0.486   0.6271    
## FaciesF2     -2.3646211  4.6544047  -0.508   0.6116    
## FaciesF3     -2.3769425  4.6697172  -0.509   0.6109    
## FaciesF5     -2.2367684  4.6697432  -0.479   0.6321    
## FaciesF7     -2.0650257  4.6623059  -0.443   0.6579    
## FaciesF8     -2.4438111  4.6776859  -0.522   0.6015    
## FaciesF9     -2.7023993  4.7023453  -0.575   0.5657    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3335 on 799 degrees of freedom
## Multiple R-squared:  0.9891, Adjusted R-squared:  0.9888 
## F-statistic:  3613 on 20 and 799 DF,  p-value: < 2.2e-16
# Plot the model (m2)
par(mfrow=c(2,2)) 
plot(m2)

m2.red <- lm(log10(k.core) ~ caliper+gamma+phi.N+SP+density+phi.core+Facies-1,data=data)
summary(m2.red)
## 
## Call:
## lm(formula = log10(k.core) ~ caliper + gamma + phi.N + SP + density + 
##     phi.core + Facies - 1, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60108 -0.11891  0.03253  0.15792  0.70925 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## caliper   -0.840136   0.182354  -4.607 4.74e-06 ***
## gamma     -0.010369   0.001337  -7.754 2.68e-14 ***
## phi.N     -1.596764   0.359392  -4.443 1.01e-05 ***
## SP        -0.001585   0.000793  -1.998  0.04600 *  
## density    1.762145   0.287056   6.139 1.31e-09 ***
## phi.core   0.093920   0.005931  15.835  < 2e-16 ***
## FaciesF1   4.981486   1.783281   2.793  0.00534 ** 
## FaciesF10  5.101106   1.793029   2.845  0.00455 ** 
## FaciesF2   4.995149   1.796855   2.780  0.00556 ** 
## FaciesF3   4.967152   1.799799   2.760  0.00591 ** 
## FaciesF5   5.158638   1.760353   2.930  0.00348 ** 
## FaciesF7   5.272425   1.759300   2.997  0.00281 ** 
## FaciesF8   4.952389   1.753512   2.824  0.00486 ** 
## FaciesF9   4.687264   1.762588   2.659  0.00799 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3346 on 805 degrees of freedom
## Multiple R-squared:  0.9889, Adjusted R-squared:  0.9887 
## F-statistic:  5128 on 14 and 805 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))  
plot(m2.red)

k.core.pred2 <- predict(m2.red,data)
k.core.pred2 <- 10^(k.core.pred2) 
 # this back transformation to log # cbind(data$k.core ,k.core.pred2 )par(mfrow=c(1,3))
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="l", col="darkred", lwd = 5, xlab='Measured',
    ylab='Depth,m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()

                                                                                                         plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred2),type="l", col="yellow", lwd = 5, xlab='Predicted',
   ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2,)
grid()

# Matching the two curves for ease of comparison 
par(mfrow=c(1,1))
 plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="l", col="green", lwd = 5, xlab='Permeability',
      ylab='Depth,m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
                                                                                                         plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred2),type="l", col="yellow", lwd = 5, xlab='',
    ylab='Depth,m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='R-sq=8436')
grid()
legend('topright', legend=c("Observed", "Predicted"), lty=c(1,1), col=c("green", "yellow"))

AdjR.sq2 <- 1-sum((k.core.pred2 - data$k.core)^2)/sum((data$k.core-mean(data$k.core))^2)
AdjR.sq2
## [1] 0.6366977
mspe.model2 <- sqrt(sum((k.core.pred2 - data$k.core)^2)/nrow(data))
mspe.model2
## [1] 1346.681
par(mfrow=c(1,1))
                                                                                                         plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="p", col="darkred", lwd = 5, pch=16, xlab='Permeability',
ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
                                                                                                         plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred2),type="p", col="green", lwd = 5, pch=17, xlab='',
ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='AdjR.sq1=0.6367 & RMSE=1346')
grid()
legend('topright', legend=c("Observed", "Predicted"), pch=c(16,17), col=c("darkred", "green"))