Data Processing

data set is :

data = read.csv("C:/Users/msi/Desktop/Rdata/karpur.csv")
head(data)
##    depth caliper ind.deep ind.med  gamma phi.N R.deep  R.med      SP
## 1 5667.0   8.685  618.005 569.781 98.823 0.410  1.618  1.755 -56.587
## 2 5667.5   8.686  497.547 419.494 90.640 0.307  2.010  2.384 -61.916
## 3 5668.0   8.686  384.935 300.155 78.087 0.203  2.598  3.332 -55.861
## 4 5668.5   8.686  278.324 205.224 66.232 0.119  3.593  4.873 -41.860
## 5 5669.0   8.686  183.743 131.155 59.807 0.069  5.442  7.625 -34.934
## 6 5669.5   8.686  109.512  75.633 57.109 0.048  9.131 13.222 -39.769
##   density.corr density phi.core   k.core Facies
## 1       -0.033   2.205  33.9000 2442.590     F1
## 2       -0.067   2.040  33.4131 3006.989     F1
## 3       -0.064   1.888  33.1000 3370.000     F1
## 4       -0.053   1.794  34.9000 2270.000     F1
## 5       -0.054   1.758  35.0644 2530.758     F1
## 6       -0.058   1.759  35.3152 2928.314     F1
par(mfrow = c(1,3))
boxplot(data$k.core, xlab = "Kcore", col = 'red', cex = 1.5)
boxplot(data$phi.core, xlab = "PHIcore", col = 'green', cex = 1.5)
boxplot(data$phi.N, xlab = "PHlog", col = 'yellow', cex = 1.5)

quartiles1 = quantile(data$k.core, probs=c(.25, .75), na.rm = FALSE)
quartiles2 = quantile(data$phi.N, probs=c(.25, .75), na.rm = FALSE)

IQR1 = IQR(data$k.core)
IQR2 = IQR(data$phi.N)

Lower1 = quartiles1[1] - 1.5*IQR1
Upper1 = quartiles1[2] + 1.5*IQR1
Lower2 = quartiles2[1] - 1.5*IQR2
Upper2 = quartiles2[2] + 1.5*IQR2
sub1 = subset(data, data$k.core > Lower1 & data$k.core < Upper1)
new_data  = subset(sub1, data$phi.N > Lower2 & data$phi.N < Upper2)
par(mfrow = c(1,3))
boxplot(new_data$k.core, xlab = "Kcore", col = 'red', cex = 1.5)
boxplot(new_data$phi.core, xlab = "PHIcore", col = 'green', cex = 1.5)

boxplot(new_data$phi.N, xlab = "PHlog", col = 'yellow', cex = 1.5)

Kcore = new_data$k.core
PHIcore = (new_data$phi.core) / 100
PHIlog = new_data$phi.N

par(mfrow = c(1,2))
hist(Kcore, main = '', xlab = "non-normlized K core")
hist(PHIcore, main = '', xlab = "non-normlized PHI core")

par(mfrow = c(2,2))
hist(Kcore, main = '', xlab = "non-normlized K core", col = "yellow")
hist(PHIcore, main = '', xlab = "non-normlized PHI core", col = "yellow")

hist(sqrt(Kcore), main = '', xlab = "normlized K core" , col = "green")
hist(PHIcore, main = '', xlab = "normlized PHI core", col = "green")

KcoreNorm = sqrt(new_data$k.core)
PhiCoreCorrected = lm(PHIcore ~ PHIlog)
summary(PhiCoreCorrected)
## 
## Call:
## lm(formula = PHIcore ~ PHIlog)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.103280 -0.040444  0.009061  0.039229  0.093787 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.27170    0.01029  26.406   <2e-16 ***
## PHIlog      -0.04495    0.04096  -1.097    0.273    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04538 on 642 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.001872,   Adjusted R-squared:  0.0003175 
## F-statistic: 1.204 on 1 and 642 DF,  p-value: 0.2729
par(mfrow = c(1,1))
plot(PHIlog, PHIcore, col = '#001c49', pch = 15, cex = 0.5)
abline(PhiCoreCorrected, col = "red" , lwd = "2")

model_input = data.frame(PHIlog)
PhiCoreCorrected = predict(PhiCoreCorrected, model_input)
KcoreCorrected = lm(KcoreNorm ~ PhiCoreCorrected)
summary(KcoreCorrected)
## 
## Call:
## lm(formula = KcoreNorm ~ PhiCoreCorrected)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.474 -13.311  -2.836  15.323  44.902 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -530.64      96.77  -5.484 6.00e-08 ***
## PhiCoreCorrected  2177.82     371.35   5.865 7.21e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.49 on 642 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.05085,    Adjusted R-squared:  0.04937 
## F-statistic: 34.39 on 1 and 642 DF,  p-value: 7.208e-09
plot(PhiCoreCorrected, KcoreNorm, col = '#001c49', pch = 15, cex = 0.5)
abline(KcoreCorrected, col = "red" , lwd = "2")

input_model2 = data.frame(PhiCoreCorrected)
KcoreCorrected = predict(KcoreCorrected, input_model2)
KcoreNorm = KcoreNorm**2
KcoreCorrected = KcoreCorrected**2

final_data = data.frame(KcoreNorm, KcoreCorrected , PHIcore, PhiCoreCorrected, PHIlog)
head(final_data)
##   KcoreNorm KcoreCorrected  PHIcore PhiCoreCorrected PHIlog
## 1  3006.989        961.924 0.334131        0.2578982  0.307
## 2  3370.000       1697.016 0.331000        0.2625726  0.203
## 3  2270.000       2442.051 0.349000        0.2663481  0.119
## 4  3000.000       2413.114 0.360000        0.2662132  0.122
## 5  3066.865       2413.114 0.346627        0.2662132  0.122
## 6  3110.000       2500.442 0.338000        0.2666177  0.113