Table of Content

Executive Summary

  1. Production Performance

  2. Clustering Analysis

  3. Closing Comments

Chunk Options included

Loading Libraries

rm(list = ls())
setwd("C:/Users/Dario/Documents/IHS/atascosa_r_project")
list.of.packages <- c("dplyr", "tidyr", "lubridate", "stringr", "ggplot2",
                      "data.table", "DT", "reshape2", "cowplot", "stargazer")
new.packages <- list.of.packages[!(list.of.packages %in% 
                                   installed.packages()[,"Package"])]
if (length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, function(x){suppressMessages(
                                     library(x, character.only = TRUE))})

Reading Header File

This analysis will focus only on wells drilled within the county Atascosa, and only Oil producers with horizontal lateral length, actively producing, and non-Multi Wells as per IHS definition.

The restricted list is chosen for the purpose of proving the workflow and is being done using the Header file which is the main static wells’ data

dfh <- tbl_df(read.table(file = "Header.csv", 
                         header = T, sep = ",", na.strings = c(NA, ""),
                         colClasses = c('character')
                         )
              )

Filtering down to Atascosa - EagleFord - Horizontal - Active Wells

#dfh <- dfh[grep('^PEELER RANCH', dfh$lease_name),]
dfh <- dfh[-grep('MULTI', dfh$prod_id), ] %>% 
  filter(formtn_code == "605EGFD" & 
         county_name == "ATASCOSA" & 
         direc_drill_flag == "H" & 
         prod_code == "O" & 
         mode_code == "A"
         )

dfh <- dfh[,c(1,8,15,16,17,19,20,28,30,31,39,40)]

Reading Production File

The Production file contains all dynamic data as Oil, Gas and Water produced on a monthly basis. Based on this data, and merging with the Header data, we will be calculating some useful parameters,
variables, which will be used in the exploratory analysis, denoising, modeling, and statistical inference and machine learning algorithms

#  Prod_ID, Date Production, Liquid (BBLS), Gas (MMCF), Water (BBLS)

dfp <- tbl_df(read.table(file = "Production.csv", 
                         header = T, sep = ",", na.strings = c(NA, ""), 
                         colClasses = c(rep('character',3), rep('numeric',6))
                         )
              )[,c(1,3:6)]

Creation of tidy data as it is defined in the document by Hadley Wickman

dfp <- dfp[-grep('MULTI', dfp$prod_id),] %>% 
  mutate(dom = as.numeric(str_sub(date_p, 7, 8)), 
         mnth = as.numeric(str_sub(date_p, 5, 6)), 
         yr = as.numeric(str_sub(date_p, 1, 4))
         ) %>% 
  select(prod_id, dom, mnth, yr, liquid_bbls, gas_mmcf, water_bbls) %>% 
  filter(!((liquid_bbls == 0) & (gas_mmcf == 0) & (water_bbls == 0))) %>% 
  group_by(prod_id) %>% 
  mutate(
    mnths_count = n(), 
    cum_days = cumsum(dom),
    gor = round((gas_mmcf/liquid_bbls)*10^3, digits = 0),
    cum_gor = round(cumsum(gor), digits = 0),
    wor = round(water_bbls/liquid_bbls, digits = 1),
    cum_wor = round(cumsum(wor), digits = 1), 
    boe = round(liquid_bbls + (gas_mmcf/6000), digits = 2), 
    cum_boe = round(cumsum(boe), digits = 0)
  ) %>% 
  arrange(prod_id, yr, mnth) %>% 
  gather(unit, volume, -prod_id, -dom, -mnth, -yr, 
         -mnths_count, -cum_days, 
         -gor, -cum_gor, -wor, -cum_wor, -boe, -cum_boe)

levels(dfp$unit) <- c(levels(dfp$unit), 'oil_bbls')
dfp$unit[dfp$unit == 'liquid_bbls'] <- 'oil_bbls'
dfp$unit <- factor(dfp$unit, levels = c('oil_bbls', 'gas_mmcf', 'water_bbls'))

Merging Data Frames - Header and Production

Well header and Monthly production are merged by using common paramenter 'prod_id' which is unique identifier as defined by IHS organization https://www.ihs.com/index.html

# work in progress

df_hp <- merge(dfh, dfp, by = "prod_id")
df_hp <- df_hp %>% 
         mutate(oper_well_name = 
                paste(paste(str_sub(oper_name, 1, 3), lease_name), well_number)
                ) %>% 
         select(prod_id, oper_well_name, lease_name, well_number, 
                yr, mnth, dom, 
                latitude, longitude, 
                total_depth, uppr_perf, lowr_perf, true_vdepth, 
                volume, unit, mnths_count, cum_days,
                gor, cum_gor, wor, cum_wor, boe, cum_boe) %>% 
         arrange(prod_id, yr, mnth) %>% 
         group_by(prod_id, unit) %>% 
         mutate(
                sqrt_days = sqrt(cum_days), 
                cum_volume = cumsum(volume), 
                rate_phase = (round(volume/dom, digits = 0)), 
                rate_max_phase = round(max(rate_phase), digits = 0), 
                rate_inv_phase = 1/rate_phase, 
                mbt_phase = cum_volume/rate_phase, 
                mbt_sqrt_phase = sqrt(mbt_phase)
               )

df_hp <- tbl_df(as.data.frame(df_hp))

# Changing long PEELER RANGE/SMEC name to PR/PS - for easier mapping display

df_hp$oper_well_name <- gsub("PEELER RANCH", "PR", df_hp$oper_well_name)
df_hp$oper_well_name <- gsub("PEELER SMEC", "PS", df_hp$oper_well_name)


write.csv(df_hp[grep("PR | PS ", df_hp$oper_well_name),], 
          file = "Atascosa-Oil-EagleFord-Header-Production.csv",
          row.names = FALSE)

Selecting dataframe subset ‘data’ for ML using Production Parameters

We will make a copy of df_hp dataframe into a new variable data, then all data denoising, statistical analysis and ML will be using this particular dataframe

data <- df_hp %>% 
        filter(unit == "oil_bbls" & str_detect(lease_name, "PEELER")) %>%
        mutate(lateral_length = as.integer(lowr_perf) - 
                                as.integer(uppr_perf)) %>% 
        select(
          oper_well_name, latitude, longitude, cum_days, 
          total_depth, true_vdepth, lowr_perf, uppr_perf, lateral_length, 
          rate_max_phase, cum_volume, cum_gor, cum_wor, cum_boe
          ) %>% 
        filter(oper_well_name != "ARG PR B 2H") %>% 
        arrange(oper_well_name) %>% group_by(oper_well_name) %>% 
        mutate(cum_days_max = max(cum_days))

data <- data[data$cum_days_max == data$cum_days,c(1:15)]

Adding Lateral Orientation - Merge with Production & Completion Parameters

Azimuthal Orientation and Toe Up/Down inclination added for Modeling

# toe inclination: 0: toe up; 1: toe down; 
#                     toe up is more efficient for pump operation

azi <- tbl_df(read.table(file = "azimuthal_toe_incline.csv", 
                         header = T, sep = ",", na.strings = c(NA, ""),
                         colClasses = c('character', rep('numeric', 3))
                         )
              )

# converting toe inclination as a factor

azi[,3] <- lapply(azi[,3] , factor)

# substituting cum days withh cum days max

data <- data[,c(1:3,15,5:14)]

# merging azimutal/toe info with dataframe 'data'

data <- merge(data, azi[,-4], by = "oper_well_name")

# conversion of character strings variables to numeric

data[,c(2:15)] <- lapply(data[,c(2:15)], as.numeric)

DT is a Javascript interface to display tabular data

Display Table with Wells in PEELER ATASCOSA

Exloratory Analysis using ggplot

Exploratory Analysis using ggplot2 - Multiple Linear Regression Model

max_cum_volume <- max(data$cum_volume)
max_rate_max_phase <- max(data$rate_max_phase)
max_cum_boe <- max(data$cum_boe)

row.names(data) <- data$oper_well_name
data$oper_well_name <- NULL

ggplot(data, aes(x=cum_boe, y=rate_max_phase, 
                 col=cum_gor, size=cum_wor)) + geom_point()

Multiple Linear Regression Model

Creating a Multiple Linear Regression Model

## 
## Call:
## lm(formula = rate_max_phase ~ lateral_length + azimuthal + cum_gor + 
##     cum_wor + true_vdepth, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -262.76 -117.66   12.90   73.79  445.67 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    -1.470e+03  2.865e+03  -0.513   0.6181  
## lateral_length  1.331e-01  1.106e-01   1.204   0.2538  
## azimuthal      -1.447e+00  5.163e+00  -0.280   0.7845  
## cum_gor        -3.980e-03  2.111e-03  -1.885   0.0861 .
## cum_wor        -2.098e+00  4.012e+00  -0.523   0.6113  
## true_vdepth     1.544e-01  2.652e-01   0.582   0.5722  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 198.7 on 11 degrees of freedom
## Multiple R-squared:  0.5365, Adjusted R-squared:  0.3259 
## F-statistic: 2.547 on 5 and 11 DF,  p-value: 0.0912
## 
## 
## Summary Statistics
## ======================================================
##                Estimate Std. Error t value Pr(> | t| )
## ------------------------------------------------------
## (Intercept)    -1,469.9  2,865.3    -0.5       0.6    
## lateral_length   0.1       0.1       1.2       0.3    
## azimuthal        -1.4      5.2      -0.3       0.8    
## cum_gor         -0.004    0.002     -1.9       0.1    
## cum_wor          -2.1      4.0      -0.5       0.6    
## true_vdepth      0.2       0.3       0.6       0.6    
## ------------------------------------------------------