Set my working directory

setwd("J:\\2016-09-22 IHS Vs Navport Comparison")

Don’t format plots using scientific format

options(scipen=10)

Read in Treatement Summary and Test Perforation Tab from xlsx

WellHeader= read.xlsx("IHS Well Workbook.xlsx", sheet=1, colNames = T)
IHS_Comp= read.xlsx("IHS Well Workbook.xlsx", sheet=8, colNames = T)
LatLength= read.xlsx("IHS Well Workbook.xlsx", sheet=9, colNames = T)

Combine Proppant and Fluid Columns into two seperate columns

IHS_Reshape= IHS_Comp%>% gather(key="UWI","Proppant Amount",5:18)
IHS_Reshape= IHS_Reshape%>% gather(key="UWI", "Fluid Amount",6:18)

Change name to Proppant Type and Fluid Type

colnames(IHS_Reshape)[c(8,10)]= c("Proppant Type(Lbs)","Fluid Type(Gals)")

Filter Horizontals using Well Header Table

WellHeader=WellHeader[,c(1,14)]

Merge Table and filter to Horizontal

IHS_Reshape= left_join(IHS_Reshape, WellHeader, by="UWI")
IHS_Reshape= IHS_Reshape%>% filter(Hole.Direction=="HORIZONTAL")

Filter out Acid jobs and proppant or fluid amounts equal to 0

IHS_Reshape= IHS_Reshape%>% filter(`Fluid Type(Gals)` !="Acid.(Gals)")%>% 
  filter(`Proppant Amount`>0& `Fluid Amount`>0)

Rename Proppant and Fluid variables(factors) using mapvalues-plyr package

unique(IHS_Reshape$`Proppant Type(Lbs)`)
## [1] "Proppant.-.Sand.(lbs)"                            
## [2] "Proppant.-.Resin.Coated.Sand.(lbs)"               
## [3] "Proppant.-.Sand,.Ceramic,.Resin.Coated.Sand.(lbs)"
## [4] "Proppant.-.Ceramic.(lbs)"                         
## [5] "Proppant.-.Sand,.Ceramic.(lbs)"                   
## [6] "Proppant.-.Resin.Coated.Ceramic.(lbs)"            
## [7] "Proppant.-.Sand,.Resin.Coated.Ceramic.(lbs)"
IHS_Reshape$`Proppant Type(Lbs)`=plyr::mapvalues(IHS_Reshape$`Proppant Type(Lbs)`,
from= c("Proppant.-.Sand.(lbs)","Proppant.-.Resin.Coated.Sand.(lbs)","Proppant.-.Sand,.Ceramic,.Resin.Coated.Sand.(lbs)",
  "Proppant.-.Ceramic.(lbs)","Proppant.-.Sand,.Ceramic.(lbs)",
  "Proppant.-.Resin.Coated.Ceramic.(lbs)","Proppant.-.Sand,.Resin.Coated.Ceramic.(lbs)"),
  to= c("Sand","Resin_Coated","Ceramic_Resin_Coated","Ceramic","Ceramic",
  "Ceramic_Resin_Coated","Ceramic_Resin_Coated"))

unique(IHS_Reshape$`Fluid Type(Gals)`)
## [1] "Fluid/Water.(Gals)"         "Fluid.-.Slick.Water.(Gals)"
## [3] "Fluid.-.Salt.Water.(Gals)"  "Fluid.-.Foam.(Gals)"       
## [5] "Fluid.-.Surfactant.(Gals)"  "Gel/x-link.(Gals)"
IHS_Reshape$`Fluid Type(Gals)`= plyr::mapvalues(IHS_Reshape$`Fluid Type(Gals)`, 
  from= c("Fluid/Water.(Gals)",
  "Fluid.-.Slick.Water.(Gals)","Fluid.-.Salt.Water.(Gals)","Fluid.-.Foam.(Gals)",
  "Fluid.-.Oil.(Gals)", "Fluid.-.Surfactant.(Gals)","Gel/x-link.(Gals)","Fluid.-.Other.(Gals)"),
  to=c("Water","Slick Water","Salt Water","Foam","Oil Fluid","Surfactant","Gel/X-Link","Other"))
## The following `from` values were not present in `x`: Fluid.-.Oil.(Gals), Fluid.-.Other.(Gals)

Filter to Horizontal Wells Only

LatLength= left_join(LatLength, WellHeader, by="UWI")
LatLength= LatLength%>% filter(Hole.Direction=="HORIZONTAL")

Calculate Lateral Length using LatLengthGrossPerfInterval Column

LatLength$Depth.Base= as.numeric(LatLength$Depth.Base)
LatLength$Depth.Top= as.numeric(LatLength$Depth.Top)
LatLength= LatLength%>% group_by((UWI))%>% 
  mutate(LateralLength= max(Depth.Base)-min(Depth.Top)) 

Remove Duplicate Column UWI

LatLength= LatLength[,c(1:11,13)]

Convert Proppant Amount to 1,000’s lbs

IHS_Reshape$`Proppant Amount`= IHS_Reshape$`Proppant Amount`/1000
IHS_Reshape$`Fluid Amount`= IHS_Reshape$`Fluid Amount`/1000

Remove outliers in proppant amount

PropF= IHS_Reshape%>% filter(IHS_Reshape$`Proppant Amount`<25000&
IHS_Reshape$`Proppant Amount`>4000)

A more sophisticated box plot which shows all points, the mean value and a violin plot highlighting the density of distribution

pirateplot(formula= `Proppant Amount`~`Fluid Type(Gals)`, data=PropF,
  gl.col="gray", cex.axis = .75, cex.lab=.75)

pirateplot(formula= `Fluid Amount`~`Proppant Type(Lbs)`, data=PropF,
           gl.col="gray", cex.axis = .75, cex.lab=.75)


Looks like we need to filter out our fluid amount data as well. Now let’s look at frac data taken from Navport Read in the file

Navport= read.csv("HzOnly2014_Current_Navport.csv", header=T,sep=",")

Format fracture date to a date format r will recognize Convert factor date column with Hour&Minutes to date format

Navport$FRACTUREDATE=format(as.POSIXct(Navport$FRACTUREDATE,format='%m/%d/%Y'),format="%Y-%m-%d")
Navport$FRACTUREDATE= as.Date(Navport$FRACTUREDATE)