data <- read.csv("C:/Users/Haneen/Desktop/haneen/karpur.csv",header=TRUE)
summary(data)
##      depth         caliper         ind.deep          ind.med       
##  Min.   :5667   Min.   :8.487   Min.   :  6.532   Min.   :  9.386  
##  1st Qu.:5769   1st Qu.:8.556   1st Qu.: 28.799   1st Qu.: 27.892  
##  Median :5872   Median :8.588   Median :217.849   Median :254.383  
##  Mean   :5873   Mean   :8.622   Mean   :275.357   Mean   :273.357  
##  3rd Qu.:5977   3rd Qu.:8.686   3rd Qu.:566.793   3rd Qu.:544.232  
##  Max.   :6083   Max.   :8.886   Max.   :769.484   Max.   :746.028  
##      gamma            phi.N            R.deep            R.med        
##  Min.   : 16.74   Min.   :0.0150   Min.   :  1.300   Min.   :  1.340  
##  1st Qu.: 40.89   1st Qu.:0.2030   1st Qu.:  1.764   1st Qu.:  1.837  
##  Median : 51.37   Median :0.2450   Median :  4.590   Median :  3.931  
##  Mean   : 53.42   Mean   :0.2213   Mean   : 24.501   Mean   : 21.196  
##  3rd Qu.: 62.37   3rd Qu.:0.2640   3rd Qu.: 34.724   3rd Qu.: 35.853  
##  Max.   :112.40   Max.   :0.4100   Max.   :153.085   Max.   :106.542  
##        SP          density.corr          density         phi.core    
##  Min.   :-73.95   Min.   :-0.067000   Min.   :1.758   Min.   :15.70  
##  1st Qu.:-42.01   1st Qu.:-0.016000   1st Qu.:2.023   1st Qu.:23.90  
##  Median :-32.25   Median :-0.007000   Median :2.099   Median :27.60  
##  Mean   :-30.98   Mean   :-0.008883   Mean   :2.102   Mean   :26.93  
##  3rd Qu.:-19.48   3rd Qu.: 0.002000   3rd Qu.:2.181   3rd Qu.:30.70  
##  Max.   : 25.13   Max.   : 0.089000   Max.   :2.387   Max.   :36.30  
##      k.core            Facies          phi.core.frac   
##  Min.   :    0.42   Length:819         Min.   :0.1570  
##  1st Qu.:  657.33   Class :character   1st Qu.:0.2390  
##  Median : 1591.22   Mode  :character   Median :0.2760  
##  Mean   : 2251.91                      Mean   :0.2693  
##  3rd Qu.: 3046.82                      3rd Qu.:0.3070  
##  Max.   :15600.00                      Max.   :0.3630
head(data)
##    depth caliper ind.deep ind.med  gamma phi.N R.deep  R.med      SP
## 1 5667.0   8.685  618.005 569.781 98.823 0.410  1.618  1.755 -56.587
## 2 5667.5   8.686  497.547 419.494 90.640 0.307  2.010  2.384 -61.916
## 3 5668.0   8.686  384.935 300.155 78.087 0.203  2.598  3.332 -55.861
## 4 5668.5   8.686  278.324 205.224 66.232 0.119  3.593  4.873 -41.860
## 5 5669.0   8.686  183.743 131.155 59.807 0.069  5.442  7.625 -34.934
## 6 5669.5   8.686  109.512  75.633 57.109 0.048  9.131 13.222 -39.769
##   density.corr density phi.core   k.core Facies phi.core.frac
## 1       -0.033   2.205  33.9000 2442.590     F1      0.339000
## 2       -0.067   2.040  33.4131 3006.989     F1      0.334131
## 3       -0.064   1.888  33.1000 3370.000     F1      0.331000
## 4       -0.053   1.794  34.9000 2270.000     F1      0.349000
## 5       -0.054   1.758  35.0644 2530.758     F1      0.350644
## 6       -0.058   1.759  35.3152 2928.314     F1      0.353152
tail(data)
##      depth caliper ind.deep ind.med  gamma phi.N R.deep R.med      SP
## 814 6080.5   8.578  683.847 672.125 24.003 0.208  1.462 1.488 -33.714
## 815 6081.0   8.590  678.002 669.171 27.855 0.214  1.475 1.494  -4.808
## 816 6081.5   8.588  668.633 661.949 32.591 0.228  1.496 1.511  -4.727
## 817 6082.0   8.588  656.328 647.918 38.547 0.243  1.524 1.543 -21.390
## 818 6082.5   8.588  643.216 628.536 45.555 0.256  1.555 1.591 -31.597
## 819 6083.0   8.588  631.098 608.163 52.244 0.265  1.584 1.644 -36.109
##     density.corr density phi.core   k.core Facies phi.core.frac
## 814        0.003   2.147  26.9000 1300.790     F5      0.269000
## 815       -0.002   2.162  25.7547 1249.706     F5      0.257547
## 816       -0.008   2.158  24.5569 1196.282     F5      0.245569
## 817       -0.006   2.136  23.3592 1142.859     F5      0.233592
## 818       -0.002   2.115  22.1614 1089.435     F5      0.221614
## 819       -0.003   2.109  20.9637 1036.012     F5      0.209637
range(data$depth)
## [1] 5667 6083
par(mfrow=c(1,2))
hist(data$k.core, main='Histogram of Permeability', xlab='Permeability, md', col='red')
hist(data$phi.core, main='Histogram of Porosity', xlab='Porosity', col='blue',xlim=c(0,45))

model1 <- lm(k.core ~ .-1, data = data)
summary(model1)
## 
## Call:
## lm(formula = k.core ~ . - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5585.6  -568.9    49.2   476.5  8928.4 
## 
## Coefficients: (1 not defined because of singularities)
##                 Estimate Std. Error t value Pr(>|t|)    
## depth          8.544e+00  1.785e+00   4.786 2.02e-06 ***
## caliper        1.413e+03  1.019e+03   1.387 0.165789    
## ind.deep      -2.418e-01  2.354e+00  -0.103 0.918220    
## ind.med        1.224e+00  2.585e+00   0.473 0.636062    
## gamma         -4.583e+01  6.010e+00  -7.626 6.88e-14 ***
## phi.N         -2.010e+03  1.476e+03  -1.362 0.173540    
## R.deep        -2.344e+01  6.288e+00  -3.727 0.000207 ***
## R.med          5.643e+01  9.065e+00   6.225 7.76e-10 ***
## SP            -7.125e+00  3.145e+00  -2.266 0.023736 *  
## density.corr  -2.567e+03  4.809e+03  -0.534 0.593602    
## density        2.319e+03  1.173e+03   1.976 0.048458 *  
## phi.core       1.921e+02  2.282e+01   8.418  < 2e-16 ***
## FaciesF1      -6.783e+04  1.760e+04  -3.853 0.000126 ***
## FaciesF10     -6.694e+04  1.765e+04  -3.793 0.000160 ***
## FaciesF2      -6.691e+04  1.761e+04  -3.800 0.000156 ***
## FaciesF3      -6.740e+04  1.767e+04  -3.815 0.000147 ***
## FaciesF5      -6.709e+04  1.767e+04  -3.798 0.000157 ***
## FaciesF7      -6.788e+04  1.764e+04  -3.848 0.000129 ***
## FaciesF8      -6.901e+04  1.770e+04  -3.900 0.000104 ***
## FaciesF9      -7.080e+04  1.779e+04  -3.980 7.52e-05 ***
## phi.core.frac         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1262 on 799 degrees of freedom
## Multiple R-squared:  0.8457, Adjusted R-squared:  0.8418 
## F-statistic: 218.9 on 20 and 799 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model1)

model1.red <- lm(k.core ~ depth+gamma+R.deep+R.med+SP+density+phi.core+Facies-1, data = data)
summary(model1.red)
## 
## Call:
## lm(formula = k.core ~ depth + gamma + R.deep + R.med + SP + density + 
##     phi.core + Facies - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5461.7  -545.5    37.0   505.0  9072.8 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## depth          8.088      1.059   7.639 6.23e-14 ***
## gamma        -51.915      4.526 -11.471  < 2e-16 ***
## R.deep       -22.467      6.261  -3.588 0.000353 ***
## R.med         48.859      8.730   5.597 2.99e-08 ***
## SP            -6.490      3.118  -2.082 0.037671 *  
## density     2013.607   1047.054   1.923 0.054818 .  
## phi.core     188.002     21.791   8.628  < 2e-16 ***
## FaciesF1  -51703.918   5802.150  -8.911  < 2e-16 ***
## FaciesF10 -50893.923   5917.391  -8.601  < 2e-16 ***
## FaciesF2  -51009.565   5854.625  -8.713  < 2e-16 ***
## FaciesF3  -51322.213   5871.731  -8.741  < 2e-16 ***
## FaciesF5  -51174.044   6008.663  -8.517  < 2e-16 ***
## FaciesF7  -52302.690   5971.976  -8.758  < 2e-16 ***
## FaciesF8  -53362.629   6033.789  -8.844  < 2e-16 ***
## FaciesF9  -54796.231   6100.716  -8.982  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1266 on 804 degrees of freedom
## Multiple R-squared:  0.8436, Adjusted R-squared:  0.8407 
## F-statistic: 289.1 on 15 and 804 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model1.red)

k.core.pred <- predict(model1.red,data)
range(data$k.core)
## [1]     0.42 15600.00
AdjR.sq1 <- 1-sum((k.core.pred - data$k.core)^2)/sum((data$k.core-mean(data$k.core))^2)
AdjR.sq1
## [1] 0.6847518
mspe.model1 <- sqrt(sum((k.core.pred - data$k.core)^2)/nrow(data))
mspe.model1
## [1] 1254.46
par(mfrow=c(1,1))
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="p", col="green", lwd = 5, pch=16, xlab='Permeability',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred),type="l", col="red", lwd = 5, pch=17, xlab='',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='AdjR.sq1=0.6847 & RMSE=1254')
grid()
legend('topright', legend=c("Observed", "Predicted"), pch=c(16,17), col=c("green", "red"))

model2<-lm(log10(k.core) ~ .-1,data=data)
summary(model2)
## 
## Call:
## lm(formula = log10(k.core) ~ . - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5804 -0.1138  0.0322  0.1529  0.7384 
## 
## Coefficients: (1 not defined because of singularities)
##                 Estimate Std. Error t value Pr(>|t|)    
## depth          0.0007425  0.0004718   1.574   0.1160    
## caliper       -0.4605945  0.2693103  -1.710   0.0876 .  
## ind.deep      -0.0007951  0.0006222  -1.278   0.2017    
## ind.med        0.0007137  0.0006833   1.044   0.2966    
## gamma         -0.0091269  0.0015885  -5.746 1.30e-08 ***
## phi.N         -1.7628155  0.3901024  -4.519 7.16e-06 ***
## R.deep        -0.0025878  0.0016620  -1.557   0.1199    
## R.med          0.0044073  0.0023960   1.839   0.0662 .  
## SP            -0.0016935  0.0008312  -2.037   0.0419 *  
## density.corr   1.4462633  1.2712045   1.138   0.2556    
## density        1.6148374  0.3100921   5.208 2.44e-07 ***
## phi.core       0.0948634  0.0060329  15.724  < 2e-16 ***
## FaciesF1      -2.3461877  4.6532000  -0.504   0.6143    
## FaciesF10     -2.2675417  4.6652182  -0.486   0.6271    
## FaciesF2      -2.3646211  4.6544047  -0.508   0.6116    
## FaciesF3      -2.3769425  4.6697172  -0.509   0.6109    
## FaciesF5      -2.2367684  4.6697432  -0.479   0.6321    
## FaciesF7      -2.0650257  4.6623059  -0.443   0.6579    
## FaciesF8      -2.4438111  4.6776859  -0.522   0.6015    
## FaciesF9      -2.7023993  4.7023453  -0.575   0.5657    
## phi.core.frac         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3335 on 799 degrees of freedom
## Multiple R-squared:  0.9891, Adjusted R-squared:  0.9888 
## F-statistic:  3613 on 20 and 799 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model2)

model2.red <- lm(log10(k.core) ~ caliper+gamma+phi.N+SP+density+phi.core+Facies-1,data=data)
summary(model2.red)
## 
## Call:
## lm(formula = log10(k.core) ~ caliper + gamma + phi.N + SP + density + 
##     phi.core + Facies - 1, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.60108 -0.11891  0.03253  0.15792  0.70925 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## caliper   -0.840136   0.182354  -4.607 4.74e-06 ***
## gamma     -0.010369   0.001337  -7.754 2.68e-14 ***
## phi.N     -1.596764   0.359392  -4.443 1.01e-05 ***
## SP        -0.001585   0.000793  -1.998  0.04600 *  
## density    1.762145   0.287056   6.139 1.31e-09 ***
## phi.core   0.093920   0.005931  15.835  < 2e-16 ***
## FaciesF1   4.981486   1.783281   2.793  0.00534 ** 
## FaciesF10  5.101106   1.793029   2.845  0.00455 ** 
## FaciesF2   4.995149   1.796855   2.780  0.00556 ** 
## FaciesF3   4.967152   1.799799   2.760  0.00591 ** 
## FaciesF5   5.158638   1.760353   2.930  0.00348 ** 
## FaciesF7   5.272425   1.759300   2.997  0.00281 ** 
## FaciesF8   4.952389   1.753512   2.824  0.00486 ** 
## FaciesF9   4.687264   1.762588   2.659  0.00799 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3346 on 805 degrees of freedom
## Multiple R-squared:  0.9889, Adjusted R-squared:  0.9887 
## F-statistic:  5128 on 14 and 805 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model2.red)

k.core.pred2 <- predict(model2.red,data)
k.core.pred2 <- 10^(k.core.pred2)
AdjR.sq2 <- 1-sum((k.core.pred2 - data$k.core)^2)/sum((data$k.core-mean(data$k.core))^2)
AdjR.sq2
## [1] 0.6366977
mspe.model2 <- sqrt(sum((k.core.pred2 - data$k.core)^2)/nrow(data))
mspe.model2
## [1] 1346.681
par(mfrow=c(1,1))
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="p", col="red", lwd = 5, pch=16, xlab='Permeability',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred2),type="l", col="blue", lwd = 5, pch=15, xlab='',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='AdjR.sq1=0.6367 & RMSE=1346')
grid()
legend('topright', legend=c("Observed", "Predicted"), pch=c(16,15), col=c("red", "blue"))

require(MASS)
## Loading required package: MASS
library(MASS)
model1 <- lm(k.core ~ .-1,data=data)
bc <- boxcox(model1, lambda=seq(0,1,by=0.1))

lambda <- bc$x[which.max(bc$y)]
lambda
## [1] 0.3333333
k.corebc <- ((data$ k.core^lambda-1)/lambda)
hist(k.corebc)

model3 <- lm(k.corebc ~ .-1,data=data)
summary(model3)
## 
## Call:
## lm(formula = k.corebc ~ . - 1, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.278  -1.469   0.223   2.034   9.186 
## 
## Coefficients: (1 not defined because of singularities)
##                 Estimate Std. Error t value Pr(>|t|)    
## depth         -5.882e-03  5.281e-03  -1.114 0.265724    
## caliper       -8.512e+00  2.976e+00  -2.861 0.004336 ** 
## ind.deep      -5.954e-03  6.866e-03  -0.867 0.386105    
## ind.med        2.620e-03  7.541e-03   0.347 0.728358    
## gamma         -7.033e-02  1.816e-02  -3.874 0.000116 ***
## phi.N         -1.852e+01  4.310e+00  -4.298 1.94e-05 ***
## R.deep         6.668e-03  1.850e-02   0.360 0.718613    
## R.med         -3.482e-02  2.707e-02  -1.286 0.198837    
## SP            -8.001e-03  9.202e-03  -0.869 0.384870    
## density.corr   1.926e+01  1.403e+01   1.372 0.170342    
## density        1.429e+01  3.430e+00   4.166 3.44e-05 ***
## phi.core       9.720e-01  6.946e-02  13.993  < 2e-16 ***
## k.core         4.020e-03  1.032e-04  38.954  < 2e-16 ***
## FaciesF1       8.374e+01  5.182e+01   1.616 0.106523    
## FaciesF10      8.438e+01  5.194e+01   1.624 0.104690    
## FaciesF2       8.323e+01  5.183e+01   1.606 0.108689    
## FaciesF3       8.267e+01  5.200e+01   1.590 0.112254    
## FaciesF5       8.392e+01  5.200e+01   1.614 0.106925    
## FaciesF7       8.756e+01  5.193e+01   1.686 0.092116 .  
## FaciesF8       8.495e+01  5.211e+01   1.630 0.103445    
## FaciesF9       8.330e+01  5.240e+01   1.590 0.112338    
## phi.core.frac         NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.68 on 798 degrees of freedom
## Multiple R-squared:  0.989,  Adjusted R-squared:  0.9887 
## F-statistic:  3402 on 21 and 798 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model3)

model3.red <- lm(k.corebc ~ caliper+gamma+phi.N+density+phi.core+Facies-1,data=data)
summary(model3.red)
## 
## Call:
## lm(formula = k.corebc ~ caliper + gamma + phi.N + density + phi.core + 
##     Facies - 1, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.0549  -3.4595   0.3749   3.5855  20.9714 
## 
## Coefficients:
##            Estimate Std. Error t value Pr(>|t|)    
## caliper   -16.76369    3.49089  -4.802 1.87e-06 ***
## gamma      -0.32095    0.02557 -12.553  < 2e-16 ***
## phi.N     -17.57664    6.88267  -2.554  0.01084 *  
## density    24.66495    5.49378   4.490 8.18e-06 ***
## phi.core    1.78088    0.11360  15.677  < 2e-16 ***
## FaciesF1   98.56843   34.08857   2.892  0.00394 ** 
## FaciesF10 103.38483   34.28718   3.015  0.00265 ** 
## FaciesF2  101.37743   34.35094   2.951  0.00326 ** 
## FaciesF3   99.07487   34.41812   2.879  0.00410 ** 
## FaciesF5  104.26268   33.65904   3.098  0.00202 ** 
## FaciesF7  100.40719   33.66106   2.983  0.00294 ** 
## FaciesF8   96.38629   33.53702   2.874  0.00416 ** 
## FaciesF9   88.74221   33.70903   2.633  0.00864 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.408 on 806 degrees of freedom
## Multiple R-squared:  0.9662, Adjusted R-squared:  0.9656 
## F-statistic:  1771 on 13 and 806 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model3.red)

k.core.pred3 <- predict(model3.red,data)
k.core.pred3 <-(1+(lambda*k.corebc))^(1/lambda)
AdjR.sq3 <- 1-sum((k.core.pred3 - data$k.core)^2)/sum((data$k.core-mean(data$k.core))^2)
AdjR.sq3
## [1] 1
mspe.model3 <- sqrt(sum((k.core.pred3 - data$k.core)^2)/nrow(data))
mspe.model3
## [1] 1.175883e-12
par(mfrow=c(1,1))
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="p", col="yellow", lwd = 5, pch=16, xlab='Permeability',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred2),type="l", col="black", lwd = 5, pch=15, xlab='',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2, main='AdjR.sq1=0.9999 & RMSE=0')
grid()
legend('topright', legend=c("Observed", "Predicted"), pch=c(16,15), col=c("yellow", "black"))

par(mfrow=c(1,1))
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(data$k.core),type="p", col="red", lwd = 5, pch=16, xlab='Permeability',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred),type="l", col="blue", lwd = 5, pch=17, xlab='',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred2),type="l", col="green", lwd = 5, pch=15, xlab='',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
par(new = TRUE)
plot(y=y<-(data$depth),ylim=rev(range(data$depth)),x=x<-(k.core.pred3),type="l", col="yellow", lwd = 5, pch=15, xlab='',
     ylab='Depth, m', xlim=c(0,16000), cex=1.5, cex.lab=1.5, cex.axis=1.2)
grid()
legend('topright', legend=c("Observed", "Predicted1", "Predicted2", "Predicted3"), pch=c(16,15,17,11), col=c("red", "blue", "green", "yellow"))