# *************UNIVERSIDAD CENTRAL DEL ECUADOR**************
# *****************FIGEMPA*******************
# **************CARRERA DE GEOLOGIA****************
# **************ESTADÍSTICA Y PROBABILIDAD ***************
# *****************GRUPO 2*******************
# Cargar los datos (Conjunto de datos)
library(PASWR)
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(knitr)
setwd("/cloud/project")
datos <- read_csv("point_oil-gas-other-regulated-wells-beginning-1860.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 42045 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (38): Well Name, Company Name, Well Type, Map Symbol, Well Status, Stat...
## dbl (12): API Well Number, County Code, API Hole Number, Sidetrack, Complet...
## lgl (1): Financial Security
## dttm (1): Date Last Modified
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
setwd("/cloud/project")
datos <- read_csv("point_oil-gas-other-regulated-wells-beginning-1860.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 42045 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (38): Well Name, Company Name, Well Type, Map Symbol, Well Status, Stat...
## dbl (12): API Well Number, County Code, API Hole Number, Sidetrack, Complet...
## lgl (1): Financial Security
## dttm (1): Date Last Modified
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#—————————————————————————— # Tema: Estadística inferencial de variables cuantitativas continuas #——————————————————————————
Espacio_acre <- as.numeric(datos$`Spacing Acres`)
## Warning: NAs introduced by coercion
Espacio_acres <- na.omit(Espacio_acre)
Espacio_filtrado <- Espacio_acres[Espacio_acres < 500]
n <- length(Espacio_filtrado)
n
## [1] 967
exp_params <- fitdistr(Espacio_filtrado, "exponential")
lambda <- exp_params$estimate
lambda
## rate
## 0.0108606
Histo_acres <- hist(Espacio_filtrado, freq = FALSE,
main = "Gráfica No.: Modelo de probabilidad - Exponencial",
xlab = "Espacio (Acres)", ylab = "Densidad de probabilidad",
col = "lightblue")
# Superponer la curva exponencial
x_exp <- seq(min(Espacio_filtrado), max(Espacio_filtrado), length.out = 1000)
plot(x_exp, dexp(x_exp, rate = lambda), type = "l", col = "blue", lwd = 2,
main = "Gráfica No.: Cálculo de probabilidades (Exponencial)",
ylab = "Densidad de probabilidad", xlab = "Espacio (Acres)", las = 2)
# TEST DE PEARSON Y CHI-CUADRADO
Fo_exp <- Histo_acres$counts
h2 <- length(Fo_exp)
P_exp <- c()
for (i in 1:h2) {
P_exp[i] <- pexp(Histo_acres$breaks[i+1], rate = lambda) - pexp(Histo_acres$breaks[i], rate = lambda)
}
Fe_exp <- P_exp * length(Espacio_filtrado)
sum(Fe_exp)
## [1] 962.7628
n
## [1] 967
plot(Fo_exp, Fe_exp, main = "Gráfica: Correlación de frecuencias en el modelo exponencial
del espacio de acres (filtrado)", xlab = "Frecuencia Observada",
ylab = "Frecuencia Esperada", col = "darkgreen")
# Correlación
Correlacion_exp <- cor(Fo_exp, Fe_exp) * 100
Correlacion_exp
## [1] 96.31829
gl_exp <- h2 - 1
gl_exp
## [1] 9
alpha <- 0.003
Fo_exp_pct <- (Fo_exp / n) * 100
Fe_exp_pct <- P_exp * 100
x2_exp <- sum((Fe_exp_pct - Fo_exp_pct)^2 / Fe_exp_pct)
x2_exp
## [1] 24.30378
chi_crit_exp <- qchisq(1 - alpha, gl_exp)
chi_crit_exp
## [1] 24.97407
x2_exp < chi_crit_exp
## [1] TRUE
Variable <- c("Espacio (Acres)")
tabla_exp <- data.frame(Variable, round(Correlacion_exp,2), round(x2_exp,2), round(chi_crit_exp,2))
colnames(tabla_exp) <- c("Variable", "Test Pearson (%)", "Chi Cuadrado", "Umbral de aceptación")
kable(tabla_exp, format = "markdown", caption = "Tabla .: Resumen del test de bondad al modelo exponencial")
| Variable | Test Pearson (%) | Chi Cuadrado | Umbral de aceptación |
|---|---|---|---|
| Espacio (Acres) | 96.32 | 24.3 | 24.97 |
prob_exp <- pexp(50, rate = lambda) - pexp(10, rate = lambda)
prob_exp * 100
## [1] 31.60987
plot(x_exp, dexp(x_exp, rate = lambda), type = "l", col = "blue", lwd = 2,
main = "Gráfica No.: Cálculo de probabilidades (Exponencial)",
ylab = "Densidad de probabilidad", xlab = "Espacio (Acres)", las = 2)
# Área sombreada entre 10 y 50 acres
x_exp <- seq(min(Espacio_filtrado), max(Espacio_filtrado), length.out = 1000)
plot(x_exp, dexp(x_exp, rate = lambda), type = "l", col = "blue", lwd = 2,
main = "Gráfica No.: Cálculo de probabilidades (Exponencial)",
ylab = "Densidad de probabilidad", xlab = "Espacio (Acres)", las = 2)
x_somb_exp <- seq(10, 50, length.out = 1000)
y_somb_exp <- dexp(x_somb_exp, rate = lambda)
polygon(c(x_somb_exp, rev(x_somb_exp)),
c(y_somb_exp, rep(0, length(y_somb_exp))),
col = rgb(1, 0, 0, 0.4), border = NA)
legend("topright", legend = c("Modelo Exponencial", "Área de Probabilidad"),
col = c("blue", "pink"), lwd = 2, pch = c(NA, 15))
axis(1, at = seq(0, max(Espacio_acres), by = 10), labels = seq(0, max(Espacio_acres), by = 10), las = 2)
#—————————————————————————— # TEOREMA DEL LÍMITE CENTRAL PARA LA VARIABLE ESPACIO (ACRES) #——————————————————————————
media_acres <- mean(Espacio_filtrado)
media_acres
## [1] 92.07591
sigma_acres <- sd(Espacio_filtrado)
sigma_acres
## [1] 87.05395
error_estandar_acres <- sigma_acres / sqrt(n)
error_estandar_acres
## [1] 2.799466
limite_inferior <- media_acres - 2 * error_estandar_acres
limite_superior <- media_acres + 2 * error_estandar_acres
limite_inferior
## [1] 86.47697
limite_superior
## [1] 97.67484
tabla_media_exp <- data.frame(round(limite_inferior, 2),
round(media_acres, 2),
round(limite_superior, 2),
round(error_estandar_acres, 2))
colnames(tabla_media_exp) <- c("Límite inferior", "Media poblacional", "Límite superior", "Desviación estándar poblacional")
kable(tabla_media_exp, format = "markdown", caption = "Tabla .: Media poblacional estimada del espacio en acres para pozos regulados")
| Límite inferior | Media poblacional | Límite superior | Desviación estándar poblacional |
|---|---|---|---|
| 86.48 | 92.08 | 97.67 | 2.8 |