Load the dataset

data = read.csv("C:/karpur.csv")
library(caret)
## Warning: package 'caret' was built under R version 4.4.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.2
## Loading required package: lattice

Scenario 1: MLR of Permeability (k.core) using all well logs without Facies

model1 <- lm(k.core ~ . - Facies, data = data)
summary(model1)
## 
## Call:
## lm(formula = k.core ~ . - Facies, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5549.5  -755.5  -178.1   578.0 11260.8 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  60762.728  16605.360   3.659 0.000269 ***
## depth           -7.398      1.446  -5.115 3.92e-07 ***
## caliper      -3955.952   1055.105  -3.749 0.000190 ***
## ind.deep       -14.183      2.345  -6.048 2.24e-09 ***
## ind.med         17.300      2.509   6.896 1.08e-11 ***
## gamma          -77.487      5.475 -14.153  < 2e-16 ***
## phi.N        -1784.704   1301.772  -1.371 0.170763    
## R.deep         -26.007      6.974  -3.729 0.000206 ***
## R.med           63.525      9.841   6.455 1.86e-10 ***
## SP              -8.784      3.460  -2.539 0.011313 *  
## density.corr  -523.060   5358.876  -0.098 0.922269    
## density       8011.106   1120.554   7.149 1.96e-12 ***
## phi.core     18320.336   2380.161   7.697 4.07e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1442 on 806 degrees of freedom
## Multiple R-squared:  0.5903, Adjusted R-squared:  0.5842 
## F-statistic: 96.77 on 12 and 806 DF,  p-value: < 2.2e-16
r2_1 <- summary(model1)$r.squared
adj_r2_1 <- summary(model1)$adj.r.squared
cat("Scenario 1 - R2:", r2_1, "Adjusted R2:", adj_r2_1, "\n")
## Scenario 1 - R2: 0.5902845 Adjusted R2: 0.5841845
k.pred1 <- predict(model1, data = data)
plot(k.pred1, data$k.core)

rmse_1 <- RMSE(k.pred1, data$k.core)
rmse_1
## [1] 1430.118

Scenario 2: Apply Stepwise Elimination to Scenario 1

model2 <- step(model1, direction = "backward")
## Start:  AIC=11926.91
## k.core ~ (depth + caliper + ind.deep + ind.med + gamma + phi.N + 
##     R.deep + R.med + SP + density.corr + density + phi.core + 
##     Facies) - Facies
## 
##                Df Sum of Sq        RSS   AIC
## - density.corr  1     19799 1675068713 11925
## - phi.N         1   3906205 1678955118 11927
## <none>                      1675048914 11927
## - SP            1  13394190 1688443104 11931
## - R.deep        1  28897686 1703946599 11939
## - caliper       1  29214826 1704263740 11939
## - depth         1  54372650 1729421563 11951
## - ind.deep      1  76022788 1751071701 11961
## - R.med         1  86603706 1761652619 11966
## - ind.med       1  98823752 1773872666 11972
## - density       1 106221406 1781270319 11975
## - phi.core      1 123125117 1798174031 11983
## - gamma         1 416312526 2091361440 12107
## 
## Step:  AIC=11924.92
## k.core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N + 
##     R.deep + R.med + SP + density + phi.core
## 
##            Df Sum of Sq        RSS   AIC
## <none>                  1675068713 11925
## - phi.N     1   4564880 1679633593 11925
## - SP        1  13491079 1688559792 11930
## - R.deep    1  28896144 1703964857 11937
## - caliper   1  29253869 1704322581 11937
## - depth     1  54825159 1729893872 11949
## - ind.deep  1  77573926 1752642639 11960
## - R.med     1  86772220 1761840933 11964
## - ind.med   1 100740701 1775809413 11971
## - density   1 114209586 1789278299 11977
## - phi.core  1 124694278 1799762991 11982
## - gamma     1 417015194 2092083907 12105
summary(model2)
## 
## Call:
## lm(formula = k.core ~ depth + caliper + ind.deep + ind.med + 
##     gamma + phi.N + R.deep + R.med + SP + density + phi.core, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5545.3  -753.4  -177.1   576.8 11260.2 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 60910.619  16525.937   3.686 0.000243 ***
## depth          -7.409      1.442  -5.139 3.46e-07 ***
## caliper     -3957.892   1054.270  -3.754 0.000186 ***
## ind.deep      -14.146      2.314  -6.113 1.52e-09 ***
## ind.med        17.263      2.478   6.967 6.74e-12 ***
## gamma         -77.461      5.465 -14.174  < 2e-16 ***
## phi.N       -1825.771   1231.150  -1.483 0.138470    
## R.deep        -25.972      6.961  -3.731 0.000204 ***
## R.med          63.466      9.816   6.466 1.75e-10 ***
## SP             -8.803      3.453  -2.549 0.010974 *  
## density      7980.761   1075.902   7.418 3.02e-13 ***
## phi.core    18343.648   2366.693   7.751 2.75e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1441 on 807 degrees of freedom
## Multiple R-squared:  0.5903, Adjusted R-squared:  0.5847 
## F-statistic: 105.7 on 11 and 807 DF,  p-value: < 2.2e-16
r2_2 <- summary(model2)$r.squared
adj_r2_2 <- summary(model2)$adj.r.squared
cat("Scenario 2 - R2:", r2_2, "Adjusted R2:", adj_r2_2, "\n")
## Scenario 2 - R2: 0.5902797 Adjusted R2: 0.5846949
k.pred2 <- predict(model2, data = data)
plot(k.pred2, data$k.core)

rmse_2 <- RMSE(k.pred2, data$k.core)
rmse_2
## [1] 1430.126

Scenario 3: MLR of Permeability (k.core) using all well logs including Facies

model3 <- lm(k.core ~ ., data = data)
summary(model3)
## 
## Call:
## lm(formula = k.core ~ ., data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5585.6  -568.9    49.2   476.5  8928.4 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.783e+04  1.760e+04  -3.853 0.000126 ***
## depth         8.544e+00  1.785e+00   4.786 2.02e-06 ***
## caliper       1.413e+03  1.019e+03   1.387 0.165789    
## ind.deep     -2.418e-01  2.354e+00  -0.103 0.918220    
## ind.med       1.224e+00  2.585e+00   0.473 0.636062    
## gamma        -4.583e+01  6.010e+00  -7.626 6.88e-14 ***
## phi.N        -2.010e+03  1.476e+03  -1.362 0.173540    
## R.deep       -2.344e+01  6.288e+00  -3.727 0.000207 ***
## R.med         5.643e+01  9.065e+00   6.225 7.76e-10 ***
## SP           -7.125e+00  3.145e+00  -2.266 0.023736 *  
## density.corr -2.567e+03  4.809e+03  -0.534 0.593602    
## density       2.319e+03  1.173e+03   1.976 0.048458 *  
## phi.core      1.921e+04  2.282e+03   8.418  < 2e-16 ***
## FaciesF10     8.921e+02  3.590e+02   2.485 0.013157 *  
## FaciesF2      9.243e+02  5.818e+02   1.589 0.112514    
## FaciesF3      4.393e+02  3.344e+02   1.313 0.189394    
## FaciesF5      7.411e+02  3.428e+02   2.162 0.030908 *  
## FaciesF7     -4.152e+01  5.742e+02  -0.072 0.942377    
## FaciesF8     -1.179e+03  3.927e+02  -3.002 0.002770 ** 
## FaciesF9     -2.969e+03  4.298e+02  -6.908 1.00e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1262 on 799 degrees of freedom
## Multiple R-squared:  0.6889, Adjusted R-squared:  0.6815 
## F-statistic: 93.12 on 19 and 799 DF,  p-value: < 2.2e-16
r2_3 <- summary(model3)$r.squared
adj_r2_3 <- summary(model3)$adj.r.squared
cat("Scenario 3 - R2:", r2_3, "Adjusted R2:", adj_r2_3, "\n")
## Scenario 3 - R2: 0.6888892 Adjusted R2: 0.6814911
k.pred3 <- predict(model3, data = data)
plot(k.pred3, data$k.core)

rmse_3 <- RMSE(k.pred3, data$k.core)
rmse_3
## [1] 1246.201

Scenario 4: Apply Stepwise Elimination to Scenario 3

model4 <- step(model3, direction = "backward")
## Start:  AIC=11715.43
## k.core ~ depth + caliper + ind.deep + ind.med + gamma + phi.N + 
##     R.deep + R.med + SP + density.corr + density + phi.core + 
##     Facies
## 
##                Df Sum of Sq        RSS   AIC
## - ind.deep      1     16793 1271937992 11713
## - ind.med       1    356746 1272277945 11714
## - density.corr  1    453661 1272374861 11714
## - phi.N         1   2953609 1274874809 11715
## - caliper       1   3063007 1274984206 11715
## <none>                      1271921199 11715
## - density       1   6217927 1278139127 11717
## - SP            1   8171834 1280093033 11719
## - R.deep        1  22117394 1294038593 11728
## - depth         1  36466976 1308388176 11737
## - R.med         1  61690461 1333611660 11752
## - gamma         1  92579723 1364500923 11771
## - phi.core      1 112793101 1384714301 11783
## - Facies        7 403127714 1675048914 11927
## 
## Step:  AIC=11713.44
## k.core ~ depth + caliper + ind.med + gamma + phi.N + R.deep + 
##     R.med + SP + density.corr + density + phi.core + Facies
## 
##                Df Sum of Sq        RSS   AIC
## - density.corr  1    437546 1272375538 11712
## - phi.N         1   2938766 1274876758 11713
## - caliper       1   3074396 1275012389 11713
## <none>                      1271937992 11713
## - density       1   6228928 1278166920 11715
## - ind.med       1   6905855 1278843848 11716
## - SP            1   8191802 1280129794 11717
## - R.deep        1  22125695 1294063687 11726
## - depth         1  39139470 1311077462 11736
## - R.med         1  61773953 1333711946 11750
## - gamma         1  92865220 1364803212 11769
## - phi.core      1 112960440 1384898432 11781
## - Facies        7 479133709 1751071701 11961
## 
## Step:  AIC=11711.72
## k.core ~ depth + caliper + ind.med + gamma + phi.N + R.deep + 
##     R.med + SP + density + phi.core + Facies
## 
##            Df Sum of Sq        RSS   AIC
## - caliper   1   2980713 1275356252 11712
## <none>                  1272375538 11712
## - phi.N     1   3279032 1275654571 11712
## - density   1   5792837 1278168375 11713
## - ind.med   1   6813959 1279189497 11714
## - SP        1   8391302 1280766840 11715
## - R.deep    1  22009402 1294384940 11724
## - depth     1  38705776 1311081314 11734
## - R.med     1  61436819 1333812357 11748
## - gamma     1  93974329 1366349868 11768
## - phi.core  1 115336515 1387712053 11781
## - Facies    7 480267100 1752642639 11960
## 
## Step:  AIC=11711.64
## k.core ~ depth + ind.med + gamma + phi.N + R.deep + R.med + SP + 
##     density + phi.core + Facies
## 
##            Df Sum of Sq        RSS   AIC
## - phi.N     1   2534906 1277891157 11711
## <none>                  1275356252 11712
## - density   1   7270311 1282626562 11714
## - SP        1   8733336 1284089587 11715
## - ind.med   1  12924050 1288280301 11718
## - R.deep    1  22449117 1297805369 11724
## - depth     1  51507476 1326863728 11742
## - R.med     1  60137982 1335494234 11747
## - phi.core  1 112564835 1387921086 11779
## - gamma     1 141535555 1416891807 11796
## - Facies    7 520094756 1795451008 11978
## 
## Step:  AIC=11711.26
## k.core ~ depth + ind.med + gamma + R.deep + R.med + SP + density + 
##     phi.core + Facies
## 
##            Df Sum of Sq        RSS   AIC
## <none>                  1277891157 11711
## - density   1   5155969 1283047127 11713
## - SP        1   8515796 1286406953 11715
## - ind.med   1  10944937 1288836095 11716
## - R.deep    1  23273312 1301164469 11724
## - depth     1  49725248 1327616405 11740
## - R.med     1  59454645 1337345802 11746
## - phi.core  1 110154394 1388045551 11777
## - gamma     1 219059092 1496950249 11839
## - Facies    7 526383446 1804274603 11980
summary(model4)
## 
## Call:
## lm(formula = k.core ~ depth + ind.med + gamma + R.deep + R.med + 
##     SP + density + phi.core + Facies, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5608.3  -567.8    35.9   500.7  8989.7 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.322e+04  6.625e+03  -6.523 1.22e-10 ***
## depth        6.648e+00  1.189e+00   5.590 3.11e-08 ***
## ind.med      1.078e+00  4.111e-01   2.623 0.008894 ** 
## gamma       -5.324e+01  4.537e+00 -11.733  < 2e-16 ***
## R.deep      -2.395e+01  6.264e+00  -3.824 0.000141 ***
## R.med        5.515e+01  9.022e+00   6.112 1.53e-09 ***
## SP          -7.214e+00  3.118e+00  -2.313 0.020960 *  
## density      1.880e+03  1.044e+03   1.800 0.072240 .  
## phi.core     1.817e+04  2.184e+03   8.320 3.77e-16 ***
## FaciesF10    8.266e+02  3.533e+02   2.340 0.019553 *  
## FaciesF2     7.035e+02  5.567e+02   1.264 0.206697    
## FaciesF3     4.100e+02  3.228e+02   1.270 0.204443    
## FaciesF5     5.913e+02  3.211e+02   1.841 0.065924 .  
## FaciesF7    -3.159e+02  5.402e+02  -0.585 0.558866    
## FaciesF8    -1.455e+03  3.122e+02  -4.661 3.69e-06 ***
## FaciesF9    -3.017e+03  3.764e+02  -8.017 3.82e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1262 on 803 degrees of freedom
## Multiple R-squared:  0.6874, Adjusted R-squared:  0.6816 
## F-statistic: 117.7 on 15 and 803 DF,  p-value: < 2.2e-16
r2_4 <- summary(model4)$r.squared
adj_r2_4 <- summary(model4)$adj.r.squared
cat("Scenario 4 - R2:", r2_4, "Adjusted R2:", adj_r2_4, "\n")
## Scenario 4 - R2: 0.687429 Adjusted R2: 0.6815901
k.pred4 <- predict(model4, data = data)
plot(k.pred4, data$k.core)

rmse_4 <- RMSE(k.pred4, data$k.core)
rmse_4
## [1] 1249.122

Scenario 5: MLR of log10(Permeability) using all well logs including Facies

data$log10_k.core <- log10(data$k.core)
model5 <- lm(log10_k.core ~ . - k.core, data = data)
summary(model5)
## 
## Call:
## lm(formula = log10_k.core ~ . - k.core, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5804 -0.1138  0.0322  0.1529  0.7384 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.3461877  4.6532000  -0.504  0.61425    
## depth         0.0007425  0.0004718   1.574  0.11596    
## caliper      -0.4605945  0.2693103  -1.710  0.08760 .  
## ind.deep     -0.0007951  0.0006222  -1.278  0.20168    
## ind.med       0.0007137  0.0006833   1.044  0.29659    
## gamma        -0.0091269  0.0015885  -5.746 1.30e-08 ***
## phi.N        -1.7628155  0.3901024  -4.519 7.16e-06 ***
## R.deep       -0.0025878  0.0016620  -1.557  0.11987    
## R.med         0.0044073  0.0023960   1.839  0.06622 .  
## SP           -0.0016935  0.0008312  -2.037  0.04194 *  
## density.corr  1.4462633  1.2712045   1.138  0.25558    
## density       1.6148374  0.3100921   5.208 2.44e-07 ***
## phi.core      9.4863406  0.6032903  15.724  < 2e-16 ***
## FaciesF10     0.0786460  0.0948909   0.829  0.40746    
## FaciesF2     -0.0184334  0.1537793  -0.120  0.90462    
## FaciesF3     -0.0307548  0.0883957  -0.348  0.72799    
## FaciesF5      0.1094193  0.0906034   1.208  0.22753    
## FaciesF7      0.2811620  0.1517797   1.852  0.06433 .  
## FaciesF8     -0.0976234  0.1038054  -0.940  0.34727    
## FaciesF9     -0.3562116  0.1135966  -3.136  0.00178 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3335 on 799 degrees of freedom
## Multiple R-squared:  0.6806, Adjusted R-squared:  0.673 
## F-statistic:  89.6 on 19 and 799 DF,  p-value: < 2.2e-16
r2_5 <- summary(model5)$r.squared
adj_r2_5 <- summary(model5)$adj.r.squared
cat("Scenario 5 - R2:", r2_5, "Adjusted R2:", adj_r2_5, "\n")
## Scenario 5 - R2: 0.6805741 Adjusted R2: 0.6729783
log_k.pred5 <- predict(model5, data = data)
k.pred5 <- 10^log_k.pred5
plot(k.pred5, data$k.core)

rmse_5 <- RMSE(k.pred5, data$k.core)
rmse_5
## [1] 1333.017

Scenario 6: Apply Stepwise Elimination to Scenario 5

model6 <- step(model5, direction = "backward")
## Start:  AIC=-1779.02
## log10_k.core ~ (depth + caliper + ind.deep + ind.med + gamma + 
##     phi.N + R.deep + R.med + SP + density.corr + density + phi.core + 
##     k.core + Facies) - k.core
## 
##                Df Sum of Sq     RSS     AIC
## - ind.med       1    0.1213  88.981 -1779.9
## - density.corr  1    0.1440  89.004 -1779.7
## - ind.deep      1    0.1816  89.042 -1779.3
## <none>                       88.860 -1779.0
## - R.deep        1    0.2696  89.130 -1778.5
## - depth         1    0.2754  89.135 -1778.5
## - caliper       1    0.3253  89.185 -1778.0
## - R.med         1    0.3763  89.236 -1777.6
## - SP            1    0.4617  89.322 -1776.8
## - phi.N         1    2.2710  91.131 -1760.3
## - density       1    3.0160  91.876 -1753.7
## - gamma         1    3.6713  92.531 -1747.9
## - Facies        7    7.0758  95.936 -1730.3
## - phi.core      1   27.4982 116.358 -1560.2
## 
## Step:  AIC=-1779.9
## log10_k.core ~ depth + caliper + ind.deep + gamma + phi.N + R.deep + 
##     R.med + SP + density.corr + density + phi.core + Facies
## 
##                Df Sum of Sq     RSS     AIC
## - density.corr  1    0.1931  89.174 -1780.1
## <none>                       88.981 -1779.9
## - ind.deep      1    0.2179  89.199 -1779.9
## - R.deep        1    0.2447  89.226 -1779.7
## - caliper       1    0.2921  89.273 -1779.2
## - R.med         1    0.3397  89.321 -1778.8
## - SP            1    0.4101  89.391 -1778.1
## - depth         1    0.4622  89.444 -1777.7
## - phi.N         1    2.2035  91.185 -1761.9
## - density       1    3.0113  91.993 -1754.6
## - gamma         1    3.5761  92.557 -1749.6
## - Facies        7    9.1242  98.106 -1714.0
## - phi.core      1   27.4190 116.400 -1561.9
## 
## Step:  AIC=-1780.12
## log10_k.core ~ depth + caliper + ind.deep + gamma + phi.N + R.deep + 
##     R.med + SP + density + phi.core + Facies
## 
##            Df Sum of Sq     RSS     AIC
## - ind.deep  1    0.2180  89.392 -1780.1
## <none>                   89.174 -1780.1
## - R.deep    1    0.2526  89.427 -1779.8
## - caliper   1    0.2676  89.442 -1779.7
## - R.med     1    0.3598  89.534 -1778.8
## - SP        1    0.3832  89.558 -1778.6
## - depth     1    0.5404  89.715 -1777.2
## - phi.N     1    2.0726  91.247 -1763.3
## - gamma     1    3.4838  92.658 -1750.7
## - density   1    3.6220  92.796 -1749.5
## - Facies    7    9.3567  98.531 -1712.4
## - phi.core  1   27.2273 116.402 -1563.9
## 
## Step:  AIC=-1780.12
## log10_k.core ~ depth + caliper + gamma + phi.N + R.deep + R.med + 
##     SP + density + phi.core + Facies
## 
##            Df Sum of Sq     RSS     AIC
## <none>                   89.392 -1780.1
## - R.deep    1    0.2869  89.679 -1779.5
## - depth     1    0.3332  89.726 -1779.1
## - SP        1    0.4296  89.822 -1778.2
## - R.med     1    0.5085  89.901 -1777.5
## - caliper   1    0.5746  89.967 -1776.9
## - phi.N     1    2.3337  91.726 -1761.0
## - gamma     1    3.8214  93.214 -1747.8
## - density   1    3.8626  93.255 -1747.5
## - Facies    7    9.2100  98.602 -1713.8
## - phi.core  1   27.0935 116.486 -1565.3
summary(model6)
## 
## Call:
## lm(formula = log10_k.core ~ depth + caliper + gamma + phi.N + 
##     R.deep + R.med + SP + density + phi.core + Facies, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.58182 -0.12001  0.03437  0.15230  0.70317 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.2671250  3.9316796  -0.322  0.74732    
## depth        0.0006562  0.0003796   1.729  0.08420 .  
## caliper     -0.5608681  0.2470292  -2.270  0.02344 *  
## gamma       -0.0091497  0.0015626  -5.855 6.94e-09 ***
## phi.N       -1.7463527  0.3816550  -4.576 5.50e-06 ***
## R.deep      -0.0026554  0.0016551  -1.604  0.10903    
## R.med        0.0049837  0.0023334   2.136  0.03300 *  
## SP          -0.0016140  0.0008221  -1.963  0.04996 *  
## density      1.7602255  0.2990153   5.887 5.79e-09 ***
## phi.core     9.2753944  0.5949259  15.591  < 2e-16 ***
## FaciesF10    0.0896953  0.0945929   0.948  0.34330    
## FaciesF2     0.0152576  0.1523676   0.100  0.92026    
## FaciesF3    -0.0292379  0.0869197  -0.336  0.73667    
## FaciesF5     0.1022238  0.0879087   1.163  0.24524    
## FaciesF7     0.2794793  0.1462763   1.911  0.05641 .  
## FaciesF8    -0.0932936  0.0927473  -1.006  0.31477    
## FaciesF9    -0.3877078  0.1030388  -3.763  0.00018 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3339 on 802 degrees of freedom
## Multiple R-squared:  0.6787, Adjusted R-squared:  0.6722 
## F-statistic: 105.9 on 16 and 802 DF,  p-value: < 2.2e-16
r2_6 <- summary(model6)$r.squared
adj_r2_6 <- summary(model6)$adj.r.squared
cat("Scenario 6 - R2:", r2_6, "Adjusted R2:", adj_r2_6, "\n")
## Scenario 6 - R2: 0.6786603 Adjusted R2: 0.6722495
log_k.pred6 <- predict(model6, data = data)
k.pred6 <- 10^log_k.pred6
plot(k.pred6, data$k.core)

rmse_6 <- RMSE(k.pred6, data$k.core)
rmse_6
## [1] 1330.932

Scenario 7: Random Subsampling Cross-Validation

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
set.seed(12345)
training <- sample_frac(data, .88)
testing <- anti_join(data, training)
## Joining with `by = join_by(depth, caliper, ind.deep, ind.med, gamma, phi.N,
## R.deep, R.med, SP, density.corr, density, phi.core, k.core, Facies,
## log10_k.core)`

Train a model on log10 permeability, predict on the test set, transform predictions, and plot results

model7 <- lm(log10_k.core ~ . - k.core, data = training)
summary(model7)
## 
## Call:
## lm(formula = log10_k.core ~ . - k.core, data = training)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.55755 -0.12122  0.02893  0.15042  0.79949 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.8586939  5.0517207  -0.566  0.57165    
## depth         0.0007538  0.0005138   1.467  0.14278    
## caliper      -0.4121456  0.2936450  -1.404  0.16090    
## ind.deep     -0.0008961  0.0006731  -1.331  0.18352    
## ind.med       0.0008326  0.0007382   1.128  0.25975    
## gamma        -0.0081585  0.0017343  -4.704 3.07e-06 ***
## phi.N        -1.8223607  0.4249430  -4.288 2.05e-05 ***
## R.deep       -0.0026448  0.0018441  -1.434  0.15196    
## R.med         0.0044863  0.0026369   1.701  0.08932 .  
## SP           -0.0023411  0.0009021  -2.595  0.00966 ** 
## density.corr  1.8245774  1.3389601   1.363  0.17342    
## density       1.5558045  0.3357715   4.634 4.29e-06 ***
## phi.core      9.7155924  0.6602205  14.716  < 2e-16 ***
## FaciesF10     0.0969229  0.1028834   0.942  0.34648    
## FaciesF2      0.0119197  0.1610035   0.074  0.94100    
## FaciesF3     -0.0435836  0.0973739  -0.448  0.65459    
## FaciesF5      0.1736229  0.0986435   1.760  0.07883 .  
## FaciesF7      0.3530026  0.1596051   2.212  0.02731 *  
## FaciesF8     -0.0608978  0.1128519  -0.540  0.58963    
## FaciesF9     -0.3206674  0.1247754  -2.570  0.01038 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.338 on 701 degrees of freedom
## Multiple R-squared:  0.6769, Adjusted R-squared:  0.6682 
## F-statistic: 77.31 on 19 and 701 DF,  p-value: < 2.2e-16
r2_7 <- summary(model7)$r.squared
adj_r2_7 <- summary(model7)$adj.r.squared
cat("Scenario 7 - R2:", r2_7, "Adjusted R2:", adj_r2_7, "\n")
## Scenario 7 - R2: 0.6769306 Adjusted R2: 0.6681741
log_k.pred7 <- predict(model7, newdata = testing)
k.pred7 <- 10^log_k.pred7
plot(k.pred7, testing$k.core)

rmse_7 <- RMSE(k.pred7, testing$k.core)
rmse_7
## [1] 1417.012

Scenario 8: Apply Stepwise Elimination to Scenario 7

model8 <- step(model7, direction = "backward")
## Start:  AIC=-1544.47
## log10_k.core ~ (depth + caliper + ind.deep + ind.med + gamma + 
##     phi.N + R.deep + R.med + SP + density.corr + density + phi.core + 
##     k.core + Facies) - k.core
## 
##                Df Sum of Sq     RSS     AIC
## - ind.med       1    0.1453  80.227 -1545.2
## - ind.deep      1    0.2025  80.284 -1544.7
## - density.corr  1    0.2121  80.293 -1544.6
## <none>                       80.081 -1544.5
## - caliper       1    0.2250  80.306 -1544.4
## - R.deep        1    0.2350  80.316 -1544.4
## - depth         1    0.2459  80.327 -1544.3
## - R.med         1    0.3307  80.412 -1543.5
## - SP            1    0.7693  80.851 -1539.6
## - phi.N         1    2.1010  82.182 -1527.8
## - density       1    2.4526  82.534 -1524.7
## - gamma         1    2.5281  82.609 -1524.1
## - Facies        7    7.0416  87.123 -1497.7
## - phi.core      1   24.7385 104.820 -1352.4
## 
## Step:  AIC=-1545.16
## log10_k.core ~ depth + caliper + ind.deep + gamma + phi.N + R.deep + 
##     R.med + SP + density.corr + density + phi.core + Facies
## 
##                Df Sum of Sq     RSS     AIC
## - ind.deep      1    0.1771  80.404 -1545.6
## - caliper       1    0.1847  80.411 -1545.5
## - R.deep        1    0.2083  80.435 -1545.3
## <none>                       80.227 -1545.2
## - density.corr  1    0.2797  80.506 -1544.7
## - R.med         1    0.2930  80.520 -1544.5
## - depth         1    0.4462  80.673 -1543.2
## - SP            1    0.7063  80.933 -1540.8
## - phi.N         1    2.0444  82.271 -1529.0
## - gamma         1    2.4208  82.647 -1525.7
## - density       1    2.4560  82.683 -1525.4
## - Facies        7    9.0431  89.270 -1482.2
## - phi.core      1   24.6543 104.881 -1354.0
## 
## Step:  AIC=-1545.57
## log10_k.core ~ depth + caliper + gamma + phi.N + R.deep + R.med + 
##     SP + density.corr + density + phi.core + Facies
## 
##                Df Sum of Sq     RSS     AIC
## <none>                       80.404 -1545.6
## - R.deep        1    0.2391  80.643 -1545.4
## - depth         1    0.2791  80.683 -1545.1
## - density.corr  1    0.2797  80.683 -1545.1
## - caliper       1    0.4071  80.811 -1543.9
## - R.med         1    0.4138  80.817 -1543.9
## - SP            1    0.7809  81.185 -1540.6
## - phi.N         1    2.3020  82.706 -1527.2
## - density       1    2.6287  83.032 -1524.4
## - gamma         1    2.6615  83.065 -1524.1
## - Facies        7    8.9227  89.326 -1483.7
## - phi.core      1   24.5255 104.929 -1355.6
summary(model8)
## 
## Call:
## lm(formula = log10_k.core ~ depth + caliper + gamma + phi.N + 
##     R.deep + R.med + SP + density.corr + density + phi.core + 
##     Facies, data = training)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.54574 -0.11773  0.03007  0.14738  0.77906 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.4946836  4.3071924  -0.347  0.72868    
## depth         0.0006531  0.0004181   1.562  0.11868    
## caliper      -0.5084994  0.2695412  -1.887  0.05963 .  
## gamma        -0.0082296  0.0017060  -4.824 1.73e-06 ***
## phi.N        -1.8795801  0.4189558  -4.486 8.46e-06 ***
## R.deep       -0.0026524  0.0018344  -1.446  0.14865    
## R.med         0.0048859  0.0025687   1.902  0.05756 .  
## SP           -0.0023342  0.0008933  -2.613  0.00917 ** 
## density.corr  2.0677445  1.3222686   1.564  0.11832    
## density       1.6014807  0.3340470   4.794 1.99e-06 ***
## phi.core      9.5898094  0.6548790  14.644  < 2e-16 ***
## FaciesF10     0.1023464  0.1024920   0.999  0.31834    
## FaciesF2      0.0206205  0.1604489   0.129  0.89778    
## FaciesF3     -0.0488202  0.0956759  -0.510  0.61002    
## FaciesF5      0.1589526  0.0960597   1.655  0.09843 .  
## FaciesF7      0.3474071  0.1534822   2.264  0.02391 *  
## FaciesF8     -0.0725270  0.1017463  -0.713  0.47619    
## FaciesF9     -0.3600590  0.1133628  -3.176  0.00156 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3382 on 703 degrees of freedom
## Multiple R-squared:  0.6756, Adjusted R-squared:  0.6678 
## F-statistic: 86.13 on 17 and 703 DF,  p-value: < 2.2e-16
r2_8 <- summary(model8)$r.squared
adj_r2_8 <- summary(model8)$adj.r.squared
cat("Scenario 8 - R2:", r2_8, "Adjusted R2:", adj_r2_8, "\n")
## Scenario 8 - R2: 0.6756298 Adjusted R2: 0.6677858
log_k.pred8 <- predict(model8, newdata = testing)
k.pred8 <- 10^log_k.pred8
plot(k.pred8, testing$k.core)

rmse_8 <- RMSE(k.pred8, testing$k.core)
rmse_8
## [1] 1440.627