Let’s Conduct (MLR) using our data (Karpur Data-set) to model and predict permeability in multiple scenarios.
1st step: Set data and take a knowledge for it:
data = read.csv("Karpur.csv")
head
## function (x, ...)
## UseMethod("head")
## <bytecode: 0x000001f181572300>
## <environment: namespace:utils>
str(data)
## 'data.frame': 819 obs. of 14 variables:
## $ depth : num 5667 5668 5668 5668 5669 ...
## $ caliper : num 8.69 8.69 8.69 8.69 8.69 ...
## $ ind.deep : num 618 498 385 278 184 ...
## $ ind.med : num 570 419 300 205 131 ...
## $ gamma : num 98.8 90.6 78.1 66.2 59.8 ...
## $ phi.N : num 0.41 0.307 0.203 0.119 0.069 0.048 0.047 0.055 0.066 0.074 ...
## $ R.deep : num 1.62 2.01 2.6 3.59 5.44 ...
## $ R.med : num 1.75 2.38 3.33 4.87 7.62 ...
## $ SP : num -56.6 -61.9 -55.9 -41.9 -34.9 ...
## $ density.corr: num -0.033 -0.067 -0.064 -0.053 -0.054 -0.058 -0.056 -0.046 -0.04 -0.043 ...
## $ density : num 2.21 2.04 1.89 1.79 1.76 ...
## $ phi.core : num 33.9 33.4 33.1 34.9 35.1 ...
## $ k.core : num 2443 3007 3370 2270 2531 ...
## $ Facies : chr "F1" "F1" "F1" "F1" ...
2nd step: Data Pre-processing:
data$log_k_core <- log10(data$k.core)
we will exclude the Facies column and use the lm()
function.
model1 = lm(k.core ~ . - Facies, data = data)
summary(model1)
##
## Call:
## lm(formula = k.core ~ . - Facies, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3929.8 -591.3 -248.1 325.8 9720.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27769.425 14322.155 1.939 0.052861 .
## depth -4.748 1.246 -3.811 0.000149 ***
## caliper -1152.640 916.392 -1.258 0.208828
## ind.deep -8.913 2.028 -4.396 1.25e-05 ***
## ind.med 11.732 2.169 5.410 8.32e-08 ***
## gamma -47.683 4.988 -9.560 < 2e-16 ***
## phi.N 1638.943 1130.272 1.450 0.147436
## R.deep -19.546 5.973 -3.272 0.001113 **
## R.med 52.302 8.437 6.199 9.05e-10 ***
## SP -5.427 2.964 -1.831 0.067500 .
## density.corr -3915.012 4584.927 -0.854 0.393421
## density 3187.818 997.745 3.195 0.001453 **
## phi.core -19.306 23.484 -0.822 0.411262
## log_k_core 2172.192 125.810 17.266 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1232 on 805 degrees of freedom
## Multiple R-squared: 0.701, Adjusted R-squared: 0.6962
## F-statistic: 145.2 on 13 and 805 DF, p-value: < 2.2e-16
# compute Adjusted R2 and RMSE
adj_r2_model1 = summary(model1)$adj.r.squared
pred1 = predict(model1, data)
library(conflicted)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
library(caret)
## Loading required package: lattice
rmse_model1 = RMSE(pred1, data$k.core)
cat("R2:", adj_r2_model1, "\nRMSE:", rmse_model1, "\n")
## R2: 0.6961776
## RMSE: 1221.692
# plot
ggplot(data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = pred1, color = "Predicted")) +
labs(title = "Scenario 1: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
Use step()
to perform step-wise elimination,
optimizing the model by selecting the most significant
variables.
library(MASS) #we will use AIC to Selecting the best set of variables that explain permeability without introducing unnecessary variables
step_model1 = stepAIC(model1, direction = "both")
## Start: AIC=11670.89
## k.core ~ (depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + phi.core +
## Facies + log_k_core) - Facies
##
## Df Sum of Sq RSS AIC
## - phi.core 1 1026275 1223409959 11670
## - density.corr 1 1107167 1223490851 11670
## - caliper 1 2402353 1224786037 11670
## <none> 1222383684 11671
## - phi.N 1 3192809 1225576493 11671
## - SP 1 5089698 1227473382 11672
## - density 1 15500991 1237884675 11679
## - R.deep 1 16258007 1238641691 11680
## - depth 1 22053180 1244436863 11684
## - ind.deep 1 29347357 1251731041 11688
## - ind.med 1 44441442 1266825126 11698
## - R.med 1 58358678 1280742362 11707
## - gamma 1 138769525 1361153208 11757
## - log_k_core 1 452665230 1675048914 11927
##
## Step: AIC=11669.58
## k.core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + log_k_core
##
## Df Sum of Sq RSS AIC
## - density.corr 1 898938 1224308897 11668
## - caliper 1 2065724 1225475683 11669
## - phi.N 1 2341993 1225751952 11669
## <none> 1223409959 11670
## + phi.core 1 1026275 1222383684 11671
## - SP 1 5112390 1228522349 11671
## - R.deep 1 16106739 1239516698 11678
## - depth 1 21925201 1245335160 11682
## - ind.deep 1 30945917 1254355875 11688
## - density 1 37906840 1261316799 11693
## - ind.med 1 45948410 1269358369 11698
## - R.med 1 57945594 1281355553 11706
## - gamma 1 145173843 1368583802 11759
## - log_k_core 1 574764072 1798174031 11983
##
## Step: AIC=11668.18
## k.core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density + log_k_core
##
## Df Sum of Sq RSS AIC
## - phi.N 1 1693135 1226002032 11667
## - caliper 1 2179158 1226488055 11668
## <none> 1224308897 11668
## + density.corr 1 898938 1223409959 11670
## + phi.core 1 818046 1223490851 11670
## - SP 1 5378155 1229687052 11670
## - R.deep 1 15788100 1240096997 11677
## - depth 1 22793514 1247102411 11681
## - ind.deep 1 30048477 1254357373 11686
## - density 1 40788710 1265097607 11693
## - ind.med 1 45060367 1269369264 11696
## - R.med 1 57326166 1281635063 11704
## - gamma 1 144422168 1368731065 11758
## - log_k_core 1 575454094 1799762991 11982
##
## Step: AIC=11667.31
## k.core ~ depth + caliper + ind.deep + ind.med + gamma + R.deep +
## R.med + SP + density + log_k_core
##
## Df Sum of Sq RSS AIC
## - caliper 1 2194569 1228196601 11667
## <none> 1226002032 11667
## + phi.N 1 1693135 1224308897 11668
## - SP 1 4676877 1230678909 11668
## + density.corr 1 250080 1225751952 11669
## + phi.core 1 177679 1225824353 11669
## - R.deep 1 16198283 1242200315 11676
## - depth 1 21230154 1247232186 11679
## - ind.deep 1 29351022 1255353054 11685
## - ind.med 1 44084812 1270086844 11694
## - density 1 54278642 1280280673 11701
## - R.med 1 59881943 1285883975 11704
## - gamma 1 153487226 1379489258 11762
## - log_k_core 1 575049736 1801051768 11980
##
## Step: AIC=11666.78
## k.core ~ depth + ind.deep + ind.med + gamma + R.deep + R.med +
## SP + density + log_k_core
##
## Df Sum of Sq RSS AIC
## <none> 1228196601 11667
## + caliper 1 2194569 1226002032 11667
## - SP 1 4263846 1232460447 11668
## + phi.N 1 1708547 1226488055 11668
## + density.corr 1 306967 1227889635 11669
## + phi.core 1 58157 1228138445 11669
## - R.deep 1 15884588 1244081190 11675
## - ind.deep 1 30774989 1258971590 11685
## - depth 1 37691546 1265888147 11690
## - ind.med 1 43902878 1272099479 11694
## - density 1 56382965 1284579566 11702
## - R.med 1 61545101 1289741703 11705
## - gamma 1 182575768 1410772370 11778
## - log_k_core 1 644586481 1872783082 12010
summary(step_model1)
##
## Call:
## lm(formula = k.core ~ depth + ind.deep + ind.med + gamma + R.deep +
## R.med + SP + density + log_k_core, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3894.6 -597.3 -261.0 323.3 9679.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7991.2471 3710.6415 2.154 0.03157 *
## depth -3.1885 0.6399 -4.983 7.67e-07 ***
## ind.deep -8.9097 1.9789 -4.502 7.71e-06 ***
## ind.med 11.4498 2.1292 5.378 9.89e-08 ***
## gamma -43.3949 3.9571 -10.966 < 2e-16 ***
## R.deep -19.2746 5.9588 -3.235 0.00127 **
## R.med 53.2390 8.3617 6.367 3.23e-10 ***
## SP -4.9010 2.9244 -1.676 0.09415 .
## density 3398.5692 557.6759 6.094 1.70e-09 ***
## log_k_core 2157.1923 104.6907 20.605 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1232 on 809 degrees of freedom
## Multiple R-squared: 0.6996, Adjusted R-squared: 0.6962
## F-statistic: 209.3 on 9 and 809 DF, p-value: < 2.2e-16
# compute Adjusted R2 and RMSE
adj_r2_step1 = summary(step_model1)$adj.r.squared
pred_step1 = predict(step_model1, data)
rmse_step1 = RMSE(pred_step1, data$k.core)
cat("R2:", adj_r2_step1, "\nRMSE:", rmse_step1, "\n")
## R2: 0.6962421
## RMSE: 1224.594
# Plot
ggplot(data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = pred_step1, color = "Predicted")) +
labs(title = "Scenario 2: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
Use all available variables, including the Facies, to predict permeability.
model2 = lm(k.core ~ ., data = data)
summary(model2)
##
## Call:
## lm(formula = k.core ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4242.0 -489.0 -90.5 358.0 8176.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.364e+04 1.553e+04 -4.098 4.59e-05 ***
## depth 7.217e+00 1.577e+00 4.577 5.47e-06 ***
## caliper 2.237e+03 9.002e+02 2.485 0.01318 *
## ind.deep 1.179e+00 2.078e+00 0.568 0.57052
## ind.med -5.185e-02 2.281e+00 -0.023 0.98187
## gamma -2.952e+01 5.409e+00 -5.458 6.44e-08 ***
## phi.N 1.141e+03 1.318e+03 0.865 0.38714
## R.deep -1.881e+01 5.554e+00 -3.387 0.00074 ***
## R.med 4.855e+01 8.011e+00 6.061 2.09e-09 ***
## SP -4.098e+00 2.781e+00 -1.474 0.14093
## density.corr -5.153e+03 4.245e+03 -1.214 0.22517
## density -5.678e+02 1.052e+03 -0.540 0.58956
## phi.core 2.256e+01 2.303e+01 0.980 0.32760
## FaciesF10 7.516e+02 3.167e+02 2.373 0.01789 *
## FaciesF2 9.573e+02 5.131e+02 1.866 0.06245 .
## FaciesF3 4.942e+02 2.950e+02 1.676 0.09420 .
## FaciesF5 5.455e+02 3.026e+02 1.803 0.07177 .
## FaciesF7 -5.441e+02 5.075e+02 -1.072 0.28402
## FaciesF8 -1.004e+03 3.465e+02 -2.898 0.00386 **
## FaciesF9 -2.332e+03 3.813e+02 -6.115 1.51e-09 ***
## log_k_core 1.787e+03 1.180e+02 15.143 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1113 on 798 degrees of freedom
## Multiple R-squared: 0.7583, Adjusted R-squared: 0.7523
## F-statistic: 125.2 on 20 and 798 DF, p-value: < 2.2e-16
# compute Adjusted R2 and RMSE
adj_r2_model2 = summary(model2)$adj.r.squared
pred2 = predict(model2, data)
rmse_model2 = RMSE(pred2, data$k.core)
cat("R2:", adj_r2_model2, "\nRMSE:", rmse_model2, "\n")
## R2: 0.7522751
## RMSE: 1098.349
# Plot
ggplot(data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = pred2, color = "Predicted")) +
labs(title = "Scenario 3: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
step_model2 = stepAIC(model2, direction = "both")
## Start: AIC=11510.56
## k.core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + phi.core +
## Facies + log_k_core
##
## Df Sum of Sq RSS AIC
## - ind.med 1 640 988017373 11509
## - density 1 360629 988377363 11509
## - ind.deep 1 398764 988415498 11509
## - phi.N 1 927016 988943749 11509
## - phi.core 1 1188038 989204771 11510
## - density.corr 1 1824201 989840935 11510
## <none> 988016734 11511
## - SP 1 2689265 990705998 11511
## - caliper 1 7642839 995659573 11515
## - R.deep 1 14206052 1002222786 11520
## - depth 1 25937107 1013953841 11530
## - gamma 1 36878856 1024895589 11539
## - R.med 1 45475900 1033492634 11545
## - Facies 7 234366950 1222383684 11671
## - log_k_core 1 283904466 1271921199 11715
##
## Step: AIC=11508.56
## k.core ~ depth + caliper + ind.deep + gamma + phi.N + R.deep +
## R.med + SP + density.corr + density + phi.core + Facies +
## log_k_core
##
## Df Sum of Sq RSS AIC
## - density 1 360349 988377722 11507
## - phi.N 1 929326 988946699 11507
## - phi.core 1 1192422 989209795 11508
## - density.corr 1 1879954 989897327 11508
## <none> 988017373 11509
## - SP 1 2745984 990763357 11509
## + ind.med 1 640 988016734 11511
## - caliper 1 7698182 995715555 11513
## - ind.deep 1 11509302 999526675 11516
## - R.deep 1 14304041 1002321414 11518
## - depth 1 29024573 1017041946 11530
## - gamma 1 37307226 1025324599 11537
## - R.med 1 45946368 1033963741 11544
## - Facies 7 278807753 1266825126 11698
## - log_k_core 1 284260572 1272277945 11714
##
## Step: AIC=11506.86
## k.core ~ depth + caliper + ind.deep + gamma + phi.N + R.deep +
## R.med + SP + density.corr + phi.core + Facies + log_k_core
##
## Df Sum of Sq RSS AIC
## - phi.N 1 667415 989045137 11505
## - phi.core 1 2150934 990528656 11507
## <none> 988377722 11507
## - density.corr 1 2419108 990796830 11507
## - SP 1 2836016 991213738 11507
## + density 1 360349 988017373 11509
## + ind.med 1 359 988377363 11509
## - caliper 1 7381181 995758903 11511
## - ind.deep 1 12002407 1000380129 11515
## - R.deep 1 15266189 1003643911 11517
## - depth 1 28800353 1017178075 11528
## - gamma 1 39978730 1028356452 11537
## - R.med 1 48009204 1036386926 11544
## - Facies 7 300645260 1289022982 11710
## - log_k_core 1 290106503 1278484225 11716
##
## Step: AIC=11505.41
## k.core ~ depth + caliper + ind.deep + gamma + R.deep + R.med +
## SP + density.corr + phi.core + Facies + log_k_core
##
## Df Sum of Sq RSS AIC
## - density.corr 1 1980086 991025223 11505
## <none> 989045137 11505
## - phi.core 1 2593201 991638338 11506
## - SP 1 2888040 991933177 11506
## + phi.N 1 667415 988377722 11507
## + density 1 98438 988946699 11507
## + ind.med 1 2425 989042712 11507
## - caliper 1 8477074 997522211 11510
## - ind.deep 1 12849098 1001894235 11514
## - R.deep 1 14782131 1003827269 11516
## - depth 1 33214988 1022260125 11530
## - R.med 1 47921179 1036966316 11542
## - gamma 1 49626622 1038671759 11544
## - log_k_core 1 290416740 1279461877 11714
## - Facies 7 309797097 1298842234 11715
##
## Step: AIC=11505.05
## k.core ~ depth + caliper + ind.deep + gamma + R.deep + R.med +
## SP + phi.core + Facies + log_k_core
##
## Df Sum of Sq RSS AIC
## <none> 991025223 11505
## + density.corr 1 1980086 989045137 11505
## - SP 1 3207249 994232472 11506
## - phi.core 1 3701082 994726305 11506
## + density 1 509845 990515378 11507
## + phi.N 1 228393 990796830 11507
## + ind.med 1 41311 990983912 11507
## - caliper 1 7512880 998538103 11509
## - ind.deep 1 12790344 1003815567 11514
## - R.deep 1 15273968 1006299191 11516
## - depth 1 31241360 1022266583 11528
## - R.med 1 47912605 1038937828 11542
## - gamma 1 61422901 1052448124 11552
## - log_k_core 1 288472919 1279498142 11712
## - Facies 7 317024707 1308049930 11718
summary(step_model2)
##
## Call:
## lm(formula = k.core ~ depth + caliper + ind.deep + gamma + R.deep +
## R.med + SP + phi.core + Facies + log_k_core, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4233.6 -489.4 -99.7 357.8 8195.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62116.476 14315.789 -4.339 1.61e-05 ***
## depth 6.921 1.377 5.028 6.11e-07 ***
## caliper 2128.182 863.099 2.466 0.013881 *
## ind.deep 1.181 0.367 3.217 0.001346 **
## gamma -29.600 4.198 -7.050 3.85e-12 ***
## R.deep -19.138 5.444 -3.516 0.000463 ***
## R.med 49.117 7.888 6.227 7.67e-10 ***
## SP -4.424 2.746 -1.611 0.107561
## phi.core 35.175 20.325 1.731 0.083899 .
## FaciesF10 659.370 264.399 2.494 0.012837 *
## FaciesF2 887.674 473.502 1.875 0.061197 .
## FaciesF3 424.256 254.047 1.670 0.095312 .
## FaciesF5 524.676 266.348 1.970 0.049195 *
## FaciesF7 -516.257 460.795 -1.120 0.262895
## FaciesF8 -971.390 265.505 -3.659 0.000270 ***
## FaciesF9 -2325.607 335.365 -6.935 8.39e-12 ***
## log_k_core 1756.411 114.955 15.279 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1112 on 802 degrees of freedom
## Multiple R-squared: 0.7576, Adjusted R-squared: 0.7528
## F-statistic: 156.7 on 16 and 802 DF, p-value: < 2.2e-16
# compute Adjusted R2 and RMSE
adj_r2_step2 = summary(step_model2)$adj.r.squared
pred_step2 = predict(step_model2, data)
rmse_step2 = RMSE(pred_step2, data$k.core)
cat("R2:", adj_r2_step2, "\nRMSE:", rmse_step2, "\n")
## R2: 0.7527601
## RMSE: 1100.02
# Plot
ggplot(data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = pred_step2, color = "Predicted")) +
labs(title = "Scenario 4: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
Transform the permeability k.core
into its logarithmic
scale Log10
, Then construct a new MLR model using all
variables, including Facies:
model3 = lm(log_k_core ~ ., data = data)
summary(model3)
##
## Call:
## lm(formula = log_k_core ~ ., data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.51653 -0.08190 0.01884 0.12933 0.62442
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.125e+00 4.142e+00 1.479 0.13959
## depth -3.244e-04 4.220e-04 -0.769 0.44230
## caliper -6.371e-01 2.378e-01 -2.679 0.00753 **
## ind.deep -7.649e-04 5.487e-04 -1.394 0.16373
## ind.med 5.608e-04 6.027e-04 0.931 0.35236
## gamma -3.404e-03 1.451e-03 -2.346 0.01924 *
## phi.N -1.512e+00 3.444e-01 -4.389 1.29e-05 ***
## R.deep 3.391e-04 1.478e-03 0.229 0.81864
## R.med -2.640e-03 2.164e-03 -1.220 0.22286
## SP -8.038e-04 7.354e-04 -1.093 0.27474
## density.corr 1.767e+00 1.121e+00 1.576 0.11548
## density 1.325e+00 2.741e-01 4.834 1.60e-06 ***
## phi.core 7.087e-02 5.551e-03 12.766 < 2e-16 ***
## k.core 1.249e-04 8.247e-06 15.143 < 2e-16 ***
## FaciesF10 -3.276e-02 8.401e-02 -0.390 0.69666
## FaciesF2 -1.339e-01 1.358e-01 -0.985 0.32469
## FaciesF3 -8.561e-02 7.804e-02 -1.097 0.27298
## FaciesF5 1.687e-02 8.014e-02 0.211 0.83332
## FaciesF7 2.863e-01 1.339e-01 2.139 0.03272 *
## FaciesF8 4.958e-02 9.206e-02 0.539 0.59034
## FaciesF9 1.452e-02 1.031e-01 0.141 0.88804
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2941 on 798 degrees of freedom
## Multiple R-squared: 0.7519, Adjusted R-squared: 0.7457
## F-statistic: 120.9 on 20 and 798 DF, p-value: < 2.2e-16
# compute Adjusted R2 and RMSE
adj_r2_model3 = summary(model3)$adj.r.squared
pred3 = predict(model3, data)
rmse_model3 = RMSE(10^pred3, data$k.core)
cat("R2:", adj_r2_model3, "\nRMSE:", rmse_model3, "\n")
## R2: 0.7456542
## RMSE: 3469.467
# Plot
predicted3 <- 10^pred3
ggplot(data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = predicted3, color = "Predicted")) +
labs(title = "Scenario 5: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
step_model3 = stepAIC(model3, direction = "both")
## Start: AIC=-1983.88
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + phi.core +
## k.core + Facies
##
## Df Sum of Sq RSS AIC
## - Facies 7 0.9845 70.010 -1986.3
## - R.deep 1 0.0046 69.030 -1985.8
## - depth 1 0.0511 69.077 -1985.3
## - ind.med 1 0.0749 69.101 -1985.0
## - SP 1 0.1033 69.129 -1984.7
## - R.med 1 0.1287 69.154 -1984.4
## - ind.deep 1 0.1681 69.194 -1983.9
## <none> 69.026 -1983.9
## - density.corr 1 0.2148 69.240 -1983.3
## - gamma 1 0.4759 69.502 -1980.3
## - caliper 1 0.6209 69.647 -1978.5
## - phi.N 1 1.6663 70.692 -1966.3
## - density 1 2.0216 71.047 -1962.2
## - phi.core 1 14.0977 83.123 -1833.7
## - k.core 1 19.8344 88.860 -1779.0
##
## Step: AIC=-1986.28
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + phi.core +
## k.core
##
## Df Sum of Sq RSS AIC
## - R.deep 1 0.0029 70.013 -1988.2
## - SP 1 0.0353 70.045 -1987.9
## - ind.med 1 0.0527 70.063 -1987.7
## - depth 1 0.0864 70.097 -1987.3
## - R.med 1 0.1528 70.163 -1986.5
## - ind.deep 1 0.1581 70.168 -1986.4
## <none> 70.010 -1986.3
## - density.corr 1 0.1915 70.202 -1986.0
## + Facies 7 0.9845 69.026 -1983.9
## - gamma 1 0.9247 70.935 -1977.5
## - caliper 1 1.1696 71.180 -1974.7
## - phi.N 1 2.2434 72.254 -1962.5
## - density 1 2.3311 72.341 -1961.5
## - phi.core 1 16.9538 86.964 -1810.7
## - k.core 1 25.9257 95.936 -1730.3
##
## Step: AIC=-1988.25
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.med + SP + density.corr + density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## - SP 1 0.0420 70.055 -1989.8
## - ind.med 1 0.0582 70.071 -1989.6
## - depth 1 0.0862 70.099 -1989.2
## - ind.deep 1 0.1671 70.180 -1988.3
## <none> 70.013 -1988.2
## - density.corr 1 0.1896 70.203 -1988.0
## + R.deep 1 0.0029 70.010 -1986.3
## + Facies 7 0.9828 69.030 -1985.8
## - gamma 1 0.9416 70.955 -1979.3
## - R.med 1 0.9727 70.986 -1979.0
## - caliper 1 1.1748 71.188 -1976.6
## - phi.N 1 2.2449 72.258 -1964.4
## - density 1 2.4035 72.416 -1962.6
## - phi.core 1 16.9523 86.965 -1812.7
## - k.core 1 26.3009 96.314 -1729.0
##
## Step: AIC=-1989.76
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.med + density.corr + density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## - ind.med 1 0.0543 70.109 -1991.1
## - depth 1 0.0831 70.138 -1990.8
## - ind.deep 1 0.1603 70.215 -1989.9
## <none> 70.055 -1989.8
## - density.corr 1 0.1785 70.234 -1989.7
## + SP 1 0.0420 70.013 -1988.2
## + R.deep 1 0.0096 70.045 -1987.9
## + Facies 7 0.9082 69.147 -1986.5
## - gamma 1 0.9074 70.962 -1981.2
## - R.med 1 0.9323 70.987 -1980.9
## - caliper 1 1.1514 71.206 -1978.4
## - phi.N 1 2.3482 72.403 -1964.8
## - density 1 2.3882 72.443 -1964.3
## - phi.core 1 16.9572 87.012 -1814.2
## - k.core 1 26.5342 96.589 -1728.7
##
## Step: AIC=-1991.13
## log_k_core ~ depth + caliper + ind.deep + gamma + phi.N + R.med +
## density.corr + density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## - depth 1 0.0758 70.185 -1992.2
## <none> 70.109 -1991.1
## - density.corr 1 0.2130 70.322 -1990.6
## + ind.med 1 0.0543 70.055 -1989.8
## + SP 1 0.0381 70.071 -1989.6
## + R.deep 1 0.0172 70.092 -1989.3
## + Facies 7 0.9040 69.205 -1987.8
## - ind.deep 1 0.8204 70.930 -1983.6
## - gamma 1 0.8531 70.962 -1983.2
## - R.med 1 0.9993 71.109 -1981.5
## - caliper 1 1.1570 71.266 -1979.7
## - phi.N 1 2.4295 72.539 -1965.2
## - density 1 2.5137 72.623 -1964.3
## - phi.core 1 16.9117 87.021 -1816.1
## - k.core 1 28.4075 98.517 -1714.5
##
## Step: AIC=-1992.24
## log_k_core ~ caliper + ind.deep + gamma + phi.N + R.med + density.corr +
## density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## <none> 70.185 -1992.2
## - density.corr 1 0.1945 70.380 -1992.0
## + depth 1 0.0758 70.109 -1991.1
## + ind.med 1 0.0470 70.138 -1990.8
## + SP 1 0.0355 70.150 -1990.7
## + R.deep 1 0.0156 70.170 -1990.4
## + Facies 7 0.9638 69.221 -1989.6
## - R.med 1 0.9436 71.129 -1983.3
## - gamma 1 1.0564 71.241 -1982.0
## - caliper 1 1.9123 72.097 -1972.2
## - ind.deep 1 2.2970 72.482 -1967.9
## - density 1 2.4386 72.624 -1966.3
## - phi.N 1 2.9529 73.138 -1960.5
## - phi.core 1 17.0365 87.222 -1816.3
## - k.core 1 29.7789 99.964 -1704.6
summary(step_model3)
##
## Call:
## lm(formula = log_k_core ~ caliper + ind.deep + gamma + phi.N +
## R.med + density.corr + density + phi.core + k.core, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.52074 -0.08448 0.02219 0.13701 0.60221
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.479e+00 1.313e+00 3.411 0.00068 ***
## caliper -6.332e-01 1.349e-01 -4.695 3.13e-06 ***
## ind.deep -3.419e-04 6.644e-05 -5.146 3.35e-07 ***
## gamma -2.964e-03 8.494e-04 -3.489 0.00051 ***
## phi.N -1.468e+00 2.515e-01 -5.834 7.82e-09 ***
## R.med -2.279e-03 6.911e-04 -3.298 0.00102 **
## density.corr 1.611e+00 1.076e+00 1.497 0.13469
## density 1.207e+00 2.276e-01 5.302 1.48e-07 ***
## phi.core 7.031e-02 5.017e-03 14.013 < 2e-16 ***
## k.core 1.269e-04 6.849e-06 18.527 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2945 on 809 degrees of freedom
## Multiple R-squared: 0.7477, Adjusted R-squared: 0.7449
## F-statistic: 266.4 on 9 and 809 DF, p-value: < 2.2e-16
# compute Adjusted R2 and RMSE
adj_r2_step3 = summary(step_model3)$adj.r.squared
pred_step3 = predict(step_model3, data)
rmse_step3 = RMSE(10^pred_step3, data$k.core)
cat("R2:", adj_r2_step3, "\nRMSE:", rmse_step3, "\n")
## R2: 0.7448982
## RMSE: 3517.557
# Plot
predicted_step3 <- 10^pred3
ggplot(data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = predicted_step3, color = "Predicted")) +
labs(title = "Scenario 6: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
A. We will split the data into 70% and 30%.
The 70% portion will be used to develop the equation, and we will test
its performance using the remaining 30%.
set.seed(123)
train_index = createDataPartition(data$log_k_core, p = 0.7, list = FALSE)
train_data = data[train_index, ]
test_data = data[-train_index, ]
B. Train the model using the training set to establish the relationship between features and the target variable, then use the model to predict values for the testing set and evaluate its accuracy by comparing predictions to actual outcomes.
cv_model = lm(log_k_core ~ ., data = train_data)
cv_pred = predict(cv_model, newdata = test_data)
# compute Adjusted R2 and RMSE
cv_adj_r2 = summary(cv_model)$adj.r.squared
cv_rmse = RMSE(10^cv_pred, test_data$k.core)
cat("R2:", cv_adj_r2, "\nRMSE:", cv_rmse, "\n")
## R2: 0.7261524
## RMSE: 2039.096
# Plot
predicted_cv_pred <- 10^cv_pred
ggplot(test_data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = predicted_cv_pred, color = "Predicted")) +
labs(title = "Scenario 7: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()
step_cv_model = stepAIC(cv_model, direction = "both")
## Start: AIC=-1343.49
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + phi.core +
## k.core + Facies
##
## Df Sum of Sq RSS AIC
## - Facies 7 0.8123 52.479 -1348.5
## - R.deep 1 0.0030 51.670 -1345.5
## - R.med 1 0.0744 51.741 -1344.7
## - depth 1 0.1145 51.781 -1344.2
## - ind.med 1 0.1441 51.811 -1343.9
## <none> 51.667 -1343.5
## - density.corr 1 0.2091 51.876 -1343.2
## - SP 1 0.2233 51.890 -1343.0
## - ind.deep 1 0.2273 51.894 -1343.0
## - gamma 1 0.3366 52.004 -1341.8
## - caliper 1 0.4487 52.116 -1340.5
## - density 1 1.0046 52.672 -1334.4
## - phi.N 1 1.3135 52.980 -1331.1
## - phi.core 1 10.4939 62.161 -1239.2
## - k.core 1 13.5538 65.221 -1211.5
##
## Step: AIC=-1348.52
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.deep + R.med + SP + density.corr + density + phi.core +
## k.core
##
## Df Sum of Sq RSS AIC
## - R.deep 1 0.0028 52.482 -1350.5
## - R.med 1 0.0419 52.521 -1350.1
## - depth 1 0.0626 52.542 -1349.8
## - ind.med 1 0.0666 52.546 -1349.8
## - SP 1 0.0950 52.574 -1349.5
## - ind.deep 1 0.1540 52.633 -1348.8
## <none> 52.479 -1348.5
## - density.corr 1 0.1859 52.665 -1348.5
## + Facies 7 0.8123 51.667 -1343.5
## - gamma 1 0.7175 53.197 -1342.7
## - caliper 1 0.7766 53.256 -1342.1
## - phi.N 1 1.4312 53.910 -1335.0
## - density 1 1.4345 53.914 -1335.0
## - phi.core 1 12.1279 64.607 -1231.0
## - k.core 1 17.8945 70.374 -1181.8
##
## Step: AIC=-1350.49
## log_k_core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N +
## R.med + SP + density.corr + density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## - depth 1 0.0626 52.545 -1351.8
## - ind.med 1 0.0639 52.546 -1351.8
## - SP 1 0.0923 52.574 -1351.5
## - ind.deep 1 0.1512 52.633 -1350.8
## <none> 52.482 -1350.5
## - density.corr 1 0.1909 52.673 -1350.4
## + R.deep 1 0.0028 52.479 -1348.5
## - R.med 1 0.5876 53.070 -1346.1
## + Facies 7 0.8121 51.670 -1345.5
## - gamma 1 0.7148 53.197 -1344.7
## - caliper 1 0.7741 53.256 -1344.1
## - phi.N 1 1.4295 53.912 -1337.0
## - density 1 1.4438 53.926 -1336.9
## - phi.core 1 12.1696 64.652 -1232.6
## - k.core 1 18.1355 70.618 -1181.8
##
## Step: AIC=-1351.81
## log_k_core ~ caliper + ind.deep + ind.med + gamma + phi.N + R.med +
## SP + density.corr + density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## - ind.med 1 0.0570 52.602 -1353.2
## - SP 1 0.0889 52.634 -1352.8
## - ind.deep 1 0.1676 52.712 -1352.0
## <none> 52.545 -1351.8
## - density.corr 1 0.1857 52.730 -1351.8
## + depth 1 0.0626 52.482 -1350.5
## + R.deep 1 0.0027 52.542 -1349.8
## - R.med 1 0.5518 53.096 -1347.8
## + Facies 7 0.7630 51.782 -1346.2
## - gamma 1 0.8393 53.384 -1344.7
## - caliper 1 1.1556 53.700 -1341.3
## - density 1 1.3820 53.927 -1338.9
## - phi.N 1 1.8055 54.350 -1334.4
## - phi.core 1 12.2366 64.781 -1233.4
## - k.core 1 19.0337 71.578 -1176.1
##
## Step: AIC=-1353.18
## log_k_core ~ caliper + ind.deep + gamma + phi.N + R.med + SP +
## density.corr + density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## - SP 1 0.0851 52.687 -1354.2
## <none> 52.602 -1353.2
## - density.corr 1 0.2171 52.819 -1352.8
## + ind.med 1 0.0570 52.545 -1351.8
## + depth 1 0.0557 52.546 -1351.8
## + R.deep 1 0.0001 52.602 -1351.2
## - R.med 1 0.6042 53.206 -1348.6
## + Facies 7 0.7373 51.864 -1347.3
## - gamma 1 0.7837 53.385 -1346.7
## - caliper 1 1.2127 53.814 -1342.1
## - ind.deep 1 1.4664 54.068 -1339.4
## - density 1 1.5100 54.112 -1338.9
## - phi.N 1 1.8663 54.468 -1335.1
## - phi.core 1 12.1795 64.781 -1235.4
## - k.core 1 20.5200 73.122 -1165.8
##
## Step: AIC=-1354.25
## log_k_core ~ caliper + ind.deep + gamma + phi.N + R.med + density.corr +
## density + phi.core + k.core
##
## Df Sum of Sq RSS AIC
## <none> 52.687 -1354.2
## - density.corr 1 0.2058 52.893 -1354.0
## + SP 1 0.0851 52.602 -1353.2
## + ind.med 1 0.0533 52.634 -1352.8
## + depth 1 0.0527 52.634 -1352.8
## + R.deep 1 0.0020 52.685 -1352.3
## - R.med 1 0.5493 53.236 -1350.3
## - gamma 1 0.7420 53.429 -1348.2
## + Facies 7 0.6212 52.066 -1347.1
## - caliper 1 1.1789 53.866 -1343.5
## - ind.deep 1 1.4695 54.156 -1340.4
## - density 1 1.4917 54.178 -1340.2
## - phi.N 1 2.0027 54.690 -1334.8
## - phi.core 1 12.1746 64.861 -1236.7
## - k.core 1 20.5846 73.271 -1166.6
cv_step_pred = predict(step_cv_model, test_data)
# compute Adjusted R2 and RMSE
cv_step_adj_r2 <- summary(step_cv_model)$adj.r.squared
cv_step_rmse <- RMSE(10^cv_step_pred, test_data$k.core)
cat("R2:", cv_step_adj_r2, "\nRMSE:", cv_step_rmse, "\n")
## R2: 0.7261841
## RMSE: 2080.903
# Plot
predicted_cv_step_pred <- 10^cv_step_pred
ggplot(test_data) +
geom_line(aes(x = depth, y = k.core, color = "Measured")) +
geom_line(aes(x = depth, y = predicted_cv_step_pred, color = "Predicted")) +
labs(title = "Scenario 7: Measured vs Predicted Permeability with Depth",
x = "Depth",
y = "Permeability") +
scale_color_manual(values = c("Measured" = "blue", "Predicted" = "red")) +
theme_minimal()