library(PASWR)
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(knitr)
setwd("/cloud/project")
datos <- read_csv("point_oil-gas-other-regulated-wells-beginning-1860.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 42045 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (38): Well Name, Company Name, Well Type, Map Symbol, Well Status, Stat...
## dbl (12): API Well Number, County Code, API Hole Number, Sidetrack, Complet...
## lgl (1): Financial Security
## dttm (1): Date Last Modified
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#—————————————————————————— # Tema: Estadística inferencial de variables cuantitativas continuas - Modelo Normal #——————————————————————————
Elevation_ft <- as.numeric(datos$`Elevation ft`)
## Warning: NAs introduced by coercion
Elevation_filtrada <- Elevation_ft[Elevation_ft >= 1000 & Elevation_ft <= 2000]
Elevation_filtrada <- na.omit(Elevation_filtrada)
n <- length(Elevation_filtrada)
n
## [1] 14102
normal_params <- fitdistr(Elevation_filtrada, "normal")
mu <- normal_params$estimate[1]
sigma <- normal_params$estimate[2]
mu
## mean
## 1515.88
sigma
## sd
## 251.8749
Histo_elev <- hist(Elevation_filtrada, freq = FALSE,
main = "Gráfica No.: Modelo de probabilidad - Normal",
xlab = "Elevación (ft)", ylab = "Densidad de probabilidad",
col = "lightblue")
# Secuencia para graficar la curva normal
x_norm <- seq(min(Elevation_filtrada), max(Elevation_filtrada), length.out = 1000)
plot(x_norm, dnorm(x_norm, mean = mu, sd = sigma), type = "l", col = "blue", lwd = 2,
main = "Gráfica No.: Modelo de probabilidad (Normal)",
ylab = "Densidad de probabilidad", xlab = "Elevación (ft)", las = 2)
# TEST DE PEARSON Y CHI-CUADRADO
Fo_norm <- Histo_elev$counts
h2 <- length(Fo_norm)
P_norm <- c()
for (i in 1:h2) {
P_norm[i] <- pnorm(Histo_elev$breaks[i+1], mean = mu, sd = sigma) -
pnorm(Histo_elev$breaks[i], mean = mu, sd = sigma)
}
Fe_norm <- P_norm * length(Elevation_filtrada)
plot(Fo_norm, Fe_norm, main = "Gráfica: Correlación de frecuencias en el modelo normal
de la elevación (ft)", xlab = "Frecuencia Observada",
ylab = "Frecuencia Esperada", col = "darkblue")
# Correlación
Correlacion_norm <- cor(Fo_norm, Fe_norm) * 100
Correlacion_norm
## [1] 93.93176
gl_norm <- h2 - 1
alpha <- 0.05
Fo_norm_pct <- (Fo_norm / n) * 100
Fe_norm_pct <- P_norm * 100
x2_norm <- sum((Fe_norm_pct - Fo_norm_pct)^2 / Fe_norm_pct)
x2_norm
## [1] 9.835572
chi_crit_norm <- qchisq(1 - alpha, gl_norm)
chi_crit_norm
## [1] 30.14353
x2_norm < chi_crit_norm
## [1] TRUE
Variable <- c("Elevación (ft)")
tabla_norm <- data.frame(Variable, round(Correlacion_norm,2), round(x2_norm,2), round(chi_crit_norm,2))
colnames(tabla_norm) <- c("Variable", "Test Pearson (%)", "Chi Cuadrado", "Umbral de aceptación")
kable(tabla_norm, format = "markdown", caption = "Tabla .: Resumen del test de bondad al modelo normal")
| Variable | Test Pearson (%) | Chi Cuadrado | Umbral de aceptación |
|---|---|---|---|
| Elevación (ft) | 93.93 | 9.84 | 30.14 |
prob_norm <- pnorm(1800, mean = mu, sd = sigma) - pnorm(1200, mean = mu, sd = sigma)
prob_norm * 100
## [1] 76.54442
plot(x_norm, dnorm(x_norm, mean = mu, sd = sigma), type = "l", col = "blue", lwd = 2,
main = "Gráfica No.: Cálculo de probabilidades (Normal)",
ylab = "Densidad de probabilidad", xlab = "Elevación (ft)", las = 2)
x_somb_norm <- seq(1200, 1800, length.out = 1000)
y_somb_norm <- dnorm(x_somb_norm, mean = mu, sd = sigma)
polygon(c(x_somb_norm, rev(x_somb_norm)),
c(y_somb_norm, rep(0, length(y_somb_norm))),
col = rgb(1, 0, 0, 0.4), border = NA)
legend("topright", legend = c("Modelo Normal", "Área de Probabilidad"),
col = c("blue", "pink"), lwd = 2, pch = c(NA, 15))
#—————————————————————————— # TEOREMA DEL LÍMITE CENTRAL PARA LA
VARIABLE ELEVACIÓN #——————————————————————————
media_elev <- mean(Elevation_filtrada)
media_elev
## [1] 1515.88
sigma_elev <- sd(Elevation_filtrada)
sigma_elev
## [1] 251.8838
error_estandar_elev <- sigma_elev / sqrt(n)
error_estandar_elev
## [1] 2.121094
limite_inferior <- media_elev - 2 * error_estandar_elev
limite_superior <- media_elev + 2 * error_estandar_elev
limite_inferior
## [1] 1511.638
limite_superior
## [1] 1520.122
tabla_media_normal <- data.frame(round(limite_inferior, 2),
round(media_elev, 2),
round(limite_superior, 2),
round(error_estandar_elev, 2))
colnames(tabla_media_normal) <- c("Límite inferior", "Media poblacional", "Límite superior", "Desviación estándar poblacional")
kable(tabla_media_normal, format = "markdown", caption = "Tabla .: Media poblacional estimada de la elevación (ft) para pozos regulados")
| Límite inferior | Media poblacional | Límite superior | Desviación estándar poblacional |
|---|---|---|---|
| 1511.64 | 1515.88 | 1520.12 | 2.12 |