1. Data Preparation

2. Creating linear regression model

3. Data visualization

###Recall Karpur Dataset
setwd("/Users/khalilahmed/Desktop/User1/Stage 4/RC/Rstudio files")
kdata<- read.csv("karpur.csv",header=TRUE)

##phi core in karpur.csv file is normal number so in excel conveted to fractin under title (phi.core.)
head(kdata)
dim(kdata)
#use histogram to show diff. ranges and freqs. b/w data
par(mfrow=c(1,3))
hist(kdata$phi.N, xlab ='log porosity', main = '', col = 'red' )
hist(kdata$phi.core., xlab ='core porosity', main = '', col = 'green' )
hist(kdata$k.core, xlab ='core permeability', main = '', col = 'yellow' )

2. Creating linear regression model

##use simple linear regression model foe core porosity as function of log porosity
slr <- lm(phi.core. ~ phi.N, data = kdata)
plot(kdata$phi.N, kdata$phi.core., xlab='log porosity', ylab='core porosity')
coef(slr)
## (Intercept)       phi.N 
##   0.3096232  -0.1820696
abline(slr, lwd=2, col='red' )
abline(0,1, col='green')

##result alot of out layers far from straight line 
summary(slr)
## 
## Call:
## lm(formula = phi.core. ~ phi.N, data = kdata)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.135237 -0.030779  0.009432  0.033563  0.104025 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.30962    0.00485  63.846   <2e-16 ***
## phi.N       -0.18207    0.02080  -8.753   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04368 on 817 degrees of freedom
## Multiple R-squared:  0.08573,    Adjusted R-squared:  0.08462 
## F-statistic: 76.61 on 1 and 817 DF,  p-value: < 2.2e-16
## simple linear regression model show Root mean square prediction error is low value (0.08462)so there are not relationship 
#between (x,y)

#use multiple linear regression model for core porosity as function of log porosity & facies
#to calculate phi core corrected to log scale
# core porosity doesn't need to normlization 
phicorelog <- lm(phi.core. ~ phi.N+Facies-1, data = kdata)
summary(phicorelog)
## 
## Call:
## lm(formula = phi.core. ~ phi.N + Facies - 1, data = kdata)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.103530 -0.011573 -0.000206  0.010463  0.102852 
## 
## Coefficients:
##           Estimate Std. Error t value Pr(>|t|)    
## phi.N     0.013364   0.018060    0.74     0.46    
## FaciesF1  0.314805   0.002777  113.37   <2e-16 ***
## FaciesF10 0.207680   0.005072   40.95   <2e-16 ***
## FaciesF2  0.175233   0.009390   18.66   <2e-16 ***
## FaciesF3  0.231939   0.004955   46.81   <2e-16 ***
## FaciesF5  0.272953   0.003914   69.74   <2e-16 ***
## FaciesF7  0.225164   0.008730   25.79   <2e-16 ***
## FaciesF8  0.305884   0.005019   60.94   <2e-16 ***
## FaciesF9  0.264448   0.004825   54.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02326 on 810 degrees of freedom
## Multiple R-squared:  0.9928, Adjusted R-squared:  0.9928 
## F-statistic: 1.246e+04 on 9 and 810 DF,  p-value: < 2.2e-16
#as we look Root mean square prediction error is high value(0.9928)close to 1 so there are linear relationship 
#between (x,y)
pphicorelog <- predict(phicorelog, newdata=kdata)
cbind('phi Core'=kdata$phi.core.,'phi log'=kdata$phi.N,phicorelog)
#use multiple linear regression model between porosity of core corrected to log scale(with facies) & core permeability 
#to calculate permeability core corrected to log scale
# prediction K without Normalization 
outnk <- lm(kdata$k.core ~ pphicorelog+Facies-1, data = kdata)
summary(outnk)
## 
## Call:
## lm(formula = kdata$k.core ~ pphicorelog + Facies - 1, data = kdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5613.4  -596.9  -130.3   475.0 10449.1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## pphicorelog  -412352      89814  -4.591 5.11e-06 ***
## FaciesF1      132659      28386   4.673 3.47e-06 ***
## FaciesF10      87869      18968   4.632 4.21e-06 ***
## FaciesF2       73980      16049   4.610 4.69e-06 ***
## FaciesF3       97910      21087   4.643 4.00e-06 ***
## FaciesF5      118916      24729   4.809 1.81e-06 ***
## FaciesF7       95868      20496   4.677 3.40e-06 ***
## FaciesF8      130990      27786   4.714 2.86e-06 ***
## FaciesF9      111324      24050   4.629 4.28e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1546 on 810 degrees of freedom
## Multiple R-squared:  0.7652, Adjusted R-squared:  0.7626 
## F-statistic: 293.2 on 9 and 810 DF,  p-value: < 2.2e-16
poutnk <- predict(outnk, newdata=kdata)
# normalize core permeability data
nkcore <- log10(kdata$k.core)
par(mfrow=c(1,2))
hist(kdata$k.core, main='', xlab='core permeability' )
hist(nkcore, main='', xlab='normalized core permeability')

kcorelog <- lm( nkcore ~ pphicorelog+Facies-1, data = kdata)
summary(kcorelog)
## 
## Call:
## lm(formula = nkcore ~ pphicorelog + Facies - 1, data = kdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.92979 -0.16693  0.02686  0.22160  0.90566 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## pphicorelog -126.548     23.804  -5.316 1.37e-07 ***
## FaciesF1      43.350      7.523   5.762 1.18e-08 ***
## FaciesF10     29.296      5.027   5.827 8.13e-09 ***
## FaciesF2      24.772      4.254   5.824 8.30e-09 ***
## FaciesF3      32.392      5.589   5.796 9.74e-09 ***
## FaciesF5      38.460      6.554   5.868 6.43e-09 ***
## FaciesF7      32.046      5.432   5.899 5.36e-09 ***
## FaciesF8      42.638      7.364   5.790 1.01e-08 ***
## FaciesF9      36.758      6.374   5.767 1.15e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4097 on 810 degrees of freedom
## Multiple R-squared:  0.9833, Adjusted R-squared:  0.9831 
## F-statistic:  5289 on 9 and 810 DF,  p-value: < 2.2e-16
#Root mean square prediction error is high value(0.9831) so there are linear relationship between (x,y) 
pkcorelog <- predict(kcorelog, newdata=kdata)
epkcorelog <- 10^pkcorelog

3. Data visualization

##visualization of data
par(mfrow=c(1,2))
plot(kdata$phi.core.,kdata$depth, xlab='core porosity', ylab='depth', col='blue', xlim = c(0.1570,0.3630),
     ylim = c(6083,5667), type ='l')

plot(kdata$phi.N,kdata$depth, xlab='log porosity', ylab='depth', col='red', xlim = c(0.0150,0.4100),
     ylim = c(6083,5667), type ='l')

plot(kdata$k.core,kdata$depth, xlab='core permeability', ylab='depth', col='brown', xlim = c( 0.42,15600.00),
     ylim = c(6083,5667), type ='l')

plot(pphicorelog,kdata$depth, xlab='corrected porosity', ylab='depth', col='green', xlim = c( 0.1775,0.3203),
     ylim = c(6083,5667), type ='l')

plot(epkcorelog,kdata$depth, xlab=' corrected permeability', ylab='depth', col='violet', xlim = c( 130.1,7815.2),
     ylim = c(6083,5667), type ='l')

boxplot(depth ~ Facies, data = kdata ,ylim = c(6083,5667))

##show in same graph 
par(mfrow=c(1,1))
plot(y=y<-(kdata$depth),ylim = c(6083,5667),
     x=x<-(kdata$k.core), type="l",col="red", lwd = 5,
     lty=3, pch=17, ylab="Depth,m", xlab="Permeability", xlim=c(0,15600))
par(new = TRUE)
plot(y=y<-(kdata$depth),ylim = c(6083,5667),
     x=x<-(poutnk), type="l",col="green",lwd = 4,
     lty=1, pch=5, ylab="", xlab="", xlim=c(0,15600))
par(new = TRUE)
plot(y=y<-(kdata$depth),ylim = c(6083,5667),
     x=x<-(epkcorelog), type="l",col="darkgreen",lwd = 4,
     lty=1, pch=5, ylab="", xlab="", xlim=c(0,15600))

grid()
legend('topright', legend=c("Observed K", "Predicted K", "Predicted Normalize K"),
       lty=c(3,1,1),col=c("red", "green", "darkgreen")) 

require(xtable)
## Loading required package: xtable
summary(kdata)
##      depth         caliper         ind.deep          ind.med       
##  Min.   :5667   Min.   :8.487   Min.   :  6.532   Min.   :  9.386  
##  1st Qu.:5769   1st Qu.:8.556   1st Qu.: 28.799   1st Qu.: 27.892  
##  Median :5872   Median :8.588   Median :217.849   Median :254.383  
##  Mean   :5873   Mean   :8.622   Mean   :275.357   Mean   :273.357  
##  3rd Qu.:5977   3rd Qu.:8.686   3rd Qu.:566.793   3rd Qu.:544.232  
##  Max.   :6083   Max.   :8.886   Max.   :769.484   Max.   :746.028  
##      gamma            phi.N            R.deep            R.med        
##  Min.   : 16.74   Min.   :0.0150   Min.   :  1.300   Min.   :  1.340  
##  1st Qu.: 40.89   1st Qu.:0.2030   1st Qu.:  1.764   1st Qu.:  1.837  
##  Median : 51.37   Median :0.2450   Median :  4.590   Median :  3.931  
##  Mean   : 53.42   Mean   :0.2213   Mean   : 24.501   Mean   : 21.196  
##  3rd Qu.: 62.37   3rd Qu.:0.2640   3rd Qu.: 34.724   3rd Qu.: 35.853  
##  Max.   :112.40   Max.   :0.4100   Max.   :153.085   Max.   :106.542  
##        SP          density.corr          density         phi.core    
##  Min.   :-73.95   Min.   :-0.067000   Min.   :1.758   Min.   :15.70  
##  1st Qu.:-42.01   1st Qu.:-0.016000   1st Qu.:2.023   1st Qu.:23.90  
##  Median :-32.25   Median :-0.007000   Median :2.099   Median :27.60  
##  Mean   :-30.98   Mean   :-0.008883   Mean   :2.102   Mean   :26.93  
##  3rd Qu.:-19.48   3rd Qu.: 0.002000   3rd Qu.:2.181   3rd Qu.:30.70  
##  Max.   : 25.13   Max.   : 0.089000   Max.   :2.387   Max.   :36.30  
##    phi.core.          k.core            Facies         
##  Min.   :0.1570   Min.   :    0.42   Length:819        
##  1st Qu.:0.2390   1st Qu.:  657.33   Class :character  
##  Median :0.2760   Median : 1591.22   Mode  :character  
##  Mean   :0.2693   Mean   : 2251.91                     
##  3rd Qu.:0.3070   3rd Qu.: 3046.82                     
##  Max.   :0.3630   Max.   :15600.00