Production Performance
Clustering Analysis
Closing Comments
Chunk Options included
rm(list = ls())
setwd("C:/Users/Dario/Documents/IHS/atascosa_r_project")
list.of.packages <- c("dplyr", "tidyr", "lubridate", "stringr", "ggplot2",
"data.table", "DT", "reshape2", "cowplot", "stargazer")
new.packages <- list.of.packages[!(list.of.packages %in%
installed.packages()[,"Package"])]
if (length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, function(x){suppressMessages(
library(x, character.only = TRUE))})
This analysis will focus only on wells drilled within the county Atascosa, and only Oil producers with horizontal lateral length, actively producing, and non-Multi Wells as per IHS definition.
The restricted list is chosen for the purpose of proving the workflow and is being done using the Header file which is the main static wells’ data
dfh <- tbl_df(read.table(file = "Header.csv",
header = T, sep = ",", na.strings = c(NA, ""),
colClasses = c('character')
)
)
#dfh <- dfh[grep('^PEELER RANCH', dfh$lease_name),]
dfh <- dfh[-grep('MULTI', dfh$prod_id), ] %>%
filter(formtn_code == "605EGFD" &
county_name == "ATASCOSA" &
direc_drill_flag == "H" &
prod_code == "O" &
mode_code == "A"
)
dfh <- dfh[,c(1,8,15,16,17,19,20,28,30,31,39,40)]
The Production file contains all dynamic data as Oil, Gas and Water produced on a monthly basis. Based on this data, and merging with the Header data, we will be calculating some useful parameters,
variables, which will be used in the exploratory analysis, denoising, modeling, and statistical inference and machine learning algorithms
# Prod_ID, Date Production, Liquid (BBLS), Gas (MMCF), Water (BBLS)
dfp <- tbl_df(read.table(file = "Production.csv",
header = T, sep = ",", na.strings = c(NA, ""),
colClasses = c(rep('character',3), rep('numeric',6))
)
)[,c(1,3:6)]
Creation of tidy data as it is defined in the document by Hadley Wickman
dfp <- dfp[-grep('MULTI', dfp$prod_id),] %>%
mutate(dom = as.numeric(str_sub(date_p, 7, 8)),
mnth = as.numeric(str_sub(date_p, 5, 6)),
yr = as.numeric(str_sub(date_p, 1, 4))
) %>%
select(prod_id, dom, mnth, yr, liquid_bbls, gas_mmcf, water_bbls) %>%
filter(!((liquid_bbls == 0) & (gas_mmcf == 0) & (water_bbls == 0))) %>%
group_by(prod_id) %>%
mutate(
mnths_count = n(),
cum_days = cumsum(dom),
gor = round((gas_mmcf/liquid_bbls)*10^3, digits = 0),
cum_gor = round(cumsum(gor), digits = 0),
wor = round(water_bbls/liquid_bbls, digits = 1),
cum_wor = round(cumsum(wor), digits = 1),
boe = round(liquid_bbls + (gas_mmcf/6000), digits = 2),
cum_boe = round(cumsum(boe), digits = 0)
) %>%
arrange(prod_id, yr, mnth) %>%
gather(unit, volume, -prod_id, -dom, -mnth, -yr,
-mnths_count, -cum_days,
-gor, -cum_gor, -wor, -cum_wor, -boe, -cum_boe)
levels(dfp$unit) <- c(levels(dfp$unit), 'oil_bbls')
dfp$unit[dfp$unit == 'liquid_bbls'] <- 'oil_bbls'
dfp$unit <- factor(dfp$unit, levels = c('oil_bbls', 'gas_mmcf', 'water_bbls'))
Well header and Monthly production are merged by using common paramenter 'prod_id' which is unique identifier as defined by IHS organization https://www.ihs.com/index.html
# work in progress
df_hp <- merge(dfh, dfp, by = "prod_id")
df_hp <- df_hp %>%
mutate(oper_well_name =
paste(paste(str_sub(oper_name, 1, 3), lease_name), well_number)
) %>%
select(prod_id, oper_well_name, lease_name, well_number,
yr, mnth, dom,
latitude, longitude,
total_depth, uppr_perf, lowr_perf, true_vdepth,
volume, unit, mnths_count, cum_days,
gor, cum_gor, wor, cum_wor, boe, cum_boe) %>%
arrange(prod_id, yr, mnth) %>%
group_by(prod_id, unit) %>%
mutate(
sqrt_days = sqrt(cum_days),
cum_volume = cumsum(volume),
rate_phase = (round(volume/dom, digits = 0)),
rate_max_phase = round(max(rate_phase), digits = 0),
rate_inv_phase = 1/rate_phase,
mbt_phase = cum_volume/rate_phase,
mbt_sqrt_phase = sqrt(mbt_phase)
)
df_hp <- tbl_df(as.data.frame(df_hp))
# Changing long PEELER RANGE/SMEC name to PR/PS - for easier mapping display
df_hp$oper_well_name <- gsub("PEELER RANCH", "PR", df_hp$oper_well_name)
df_hp$oper_well_name <- gsub("PEELER SMEC", "PS", df_hp$oper_well_name)
write.csv(df_hp[grep("PR | PS ", df_hp$oper_well_name),],
file = "Atascosa-Oil-EagleFord-Header-Production.csv",
row.names = FALSE)
We will make a copy of df_hp dataframe into a new variable data, then all data denoising, statistical analysis and ML will be using this particular dataframe
data <- df_hp %>%
filter(unit == "oil_bbls" & str_detect(lease_name, "PEELER")) %>%
mutate(lateral_length = as.integer(lowr_perf) -
as.integer(uppr_perf)) %>%
select(
oper_well_name, latitude, longitude, cum_days,
total_depth, true_vdepth, lowr_perf, uppr_perf, lateral_length,
rate_max_phase, cum_volume, cum_gor, cum_wor, cum_boe
) %>%
filter(oper_well_name != "ARG PR B 2H") %>%
arrange(oper_well_name) %>% group_by(oper_well_name) %>%
mutate(cum_days_max = max(cum_days))
data <- data[data$cum_days_max == data$cum_days,c(1:15)]
Azimuthal Orientation and Toe Up/Down inclination added for Modeling
# toe inclination: 0: toe up; 1: toe down;
# toe up is more efficient for pump operation
azi <- tbl_df(read.table(file = "azimuthal_toe_incline.csv",
header = T, sep = ",", na.strings = c(NA, ""),
colClasses = c('character', rep('numeric', 3))
)
)
# converting toe inclination as a factor
azi[,3] <- lapply(azi[,3] , factor)
# substituting cum days withh cum days max
data <- data[,c(1:3,15,5:14)]
# merging azimutal/toe info with dataframe 'data'
data <- merge(data, azi[,-4], by = "oper_well_name")
# conversion of character strings variables to numeric
data[,c(2:15)] <- lapply(data[,c(2:15)], as.numeric)
DT is a Javascript interface to display tabular data
Display Table with Wells in PEELER ATASCOSA
Exploratory Analysis using ggplot2 - Multiple Linear Regression Model
max_cum_volume <- max(data$cum_volume)
max_rate_max_phase <- max(data$rate_max_phase)
max_cum_boe <- max(data$cum_boe)
row.names(data) <- data$oper_well_name
data$oper_well_name <- NULL
ggplot(data, aes(x=cum_boe, y=rate_max_phase,
col=cum_gor, size=cum_wor)) + geom_point()
Creating a Multiple Linear Regression Model
##
## Call:
## lm(formula = rate_max_phase ~ lateral_length + azimuthal + cum_gor +
## cum_wor + true_vdepth, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -262.76 -117.66 12.90 73.79 445.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.470e+03 2.865e+03 -0.513 0.6181
## lateral_length 1.331e-01 1.106e-01 1.204 0.2538
## azimuthal -1.447e+00 5.163e+00 -0.280 0.7845
## cum_gor -3.980e-03 2.111e-03 -1.885 0.0861 .
## cum_wor -2.098e+00 4.012e+00 -0.523 0.6113
## true_vdepth 1.544e-01 2.652e-01 0.582 0.5722
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 198.7 on 11 degrees of freedom
## Multiple R-squared: 0.5365, Adjusted R-squared: 0.3259
## F-statistic: 2.547 on 5 and 11 DF, p-value: 0.0912
##
##
## Summary Statistics
## ======================================================
## Estimate Std. Error t value Pr(> | t| )
## ------------------------------------------------------
## (Intercept) -1,469.9 2,865.3 -0.5 0.6
## lateral_length 0.1 0.1 1.2 0.3
## azimuthal -1.4 5.2 -0.3 0.8
## cum_gor -0.004 0.002 -1.9 0.1
## cum_wor -2.1 4.0 -0.5 0.6
## true_vdepth 0.2 0.3 0.6 0.6
## ------------------------------------------------------