Introducción
Análisis de la variable STATUS del dataset petrolero.
Configuración y Carga de Datos
library(readxl)
library(dplyr)
library(ggplot2)
library(gt)
library(scales)
library(knitr)
datos <- read_excel("dataset_mundial_petro.xlsx")
head(datos)
## # A tibble: 6 × 23
## `Unit ID` `Unit Name` Unit name local scri…¹ `Fuel type` `Unit type` Country
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 OG0000001 Matzen <NA> oil and gas field Austria
## 2 OG0000002 Abalone Abalone oil and gas field Brazil
## 3 OG0000006 Aguilhada Aguilhada oil and gas field Brazil
## 4 OG0000007 Agulha Agulha oil and gas field Brazil
## 5 OG0000008 Albacora Albacora oil and gas field Brazil
## 6 OG0000009 Albacora Les… Albacora Leste oil and gas field Brazil
## # ℹ abbreviated name: ¹`Unit name local script`
## # ℹ 17 more variables: `Subnational unit (province, state)` <chr>,
## # Latitude <dbl>, Longitude <dbl>, `Location accuracy` <chr>, Status <chr>,
## # `Status year` <dbl>, `Discovery year` <dbl>, `FID Year` <chr>,
## # `Production start year` <chr>, Operator <chr>, Owner <chr>, Parent <chr>,
## # Basin <chr>, `Concession / block` <chr>, `Project or complex` <chr>,
## # `Government unit ID` <chr>, `Wiki URL` <chr>
Tabla de Distribución de Frecuencias
tabla <- data.frame(
Estado = levels(status$Status),
Frecuencia = as.vector(table(status$Status))
)
tabla <- tabla %>%
mutate(
Frecuencia_Relativa = Frecuencia/sum(Frecuencia),
Porcentaje = Frecuencia_Relativa*100
)
tabla
## Estado Frecuencia Frecuencia_Relativa Porcentaje
## 1 Exploration 0 NaN NaN
## 2 Discovery 0 NaN NaN
## 3 Development 0 NaN NaN
## 4 Operating 0 NaN NaN
## 5 Closed 0 NaN NaN
## 6 Decommissioned 0 NaN NaN
## 7 Abandoned 0 NaN NaN
## 8 Underground Storage 0 NaN NaN
## 9 Cancelled 0 NaN NaN
tabla %>% gt()
| Estado |
Frecuencia |
Frecuencia_Relativa |
Porcentaje |
| Exploration |
0 |
NaN |
NaN |
| Discovery |
0 |
NaN |
NaN |
| Development |
0 |
NaN |
NaN |
| Operating |
0 |
NaN |
NaN |
| Closed |
0 |
NaN |
NaN |
| Decommissioned |
0 |
NaN |
NaN |
| Abandoned |
0 |
NaN |
NaN |
| Underground Storage |
0 |
NaN |
NaN |
| Cancelled |
0 |
NaN |
NaN |
Análisis Gráfico
ggplot(tabla, aes(Estado, Frecuencia)) +
geom_col() +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Conjetura del Modelo Probabilístico
media_status <- mean(status_num)
p <- 1/media_status
media_status
## [1] NA
p
## [1] NA
Frecuencias Esperadas
k <- 1:length(tabla$Frecuencia)
prob_geo <- dgeom(k - 1, p)
esperadas <- prob_geo * sum(tabla$Frecuencia)
tabla_modelo <- data.frame(
Categoria = k,
Observada = tabla$Frecuencia,
Esperada = round(esperadas,2)
)
tabla_modelo
## Categoria Observada Esperada
## 1 1 0 NA
## 2 2 0 NA
## 3 3 0 NA
## 4 4 0 NA
## 5 5 0 NA
## 6 6 0 NA
## 7 7 0 NA
## 8 8 0 NA
## 9 9 0 NA
Test de Pearson
pearson <- sum(
(tabla_modelo$Observada - tabla_modelo$Esperada)^2 /
tabla_modelo$Esperada
)
pearson
## [1] NA
Test Chi-cuadrado
gl <- nrow(tabla_modelo) - 2
chi_critico <- qchisq(0.95, df = gl)
chi_critico
## [1] 14.06714
Tabla Resumen
resultado <- ifelse(
pearson < chi_critico,
"No se rechaza H0",
"Se rechaza H0"
)
resumen <- data.frame(
Estadistico = pearson,
Chi_Critico = chi_critico,
Decision = resultado
)
resumen %>% gt()
| Estadistico |
Chi_Critico |
Decision |
| NA |
14.06714 |
NA |
Probabilidades
probabilidades <- data.frame(
Categoria = k,
Probabilidad = round(prob_geo,4)
)
probabilidades %>% gt()
| Categoria |
Probabilidad |
| 1 |
NA |
| 2 |
NA |
| 3 |
NA |
| 4 |
NA |
| 5 |
NA |
| 6 |
NA |
| 7 |
NA |
| 8 |
NA |
| 9 |
NA |