setwd("/cloud/project/datos")
datos <- read.csv("Petroleo_Ontaro.csv", header=T, dec=".", sep=";")
# ANÁLISIS VARIABLE ORDINAL: Precisión de localización
library(RColorBrewer)
# Extraer variable
location_accuracy <- datos$LOCATION_ACCURACY
# Tabla cruda ordenada
tabla_cruda_location_accuracy <- sort(table(location_accuracy), decreasing = TRUE)
top_n_location_accuracy <- 5
top_valores_location_accuracy <- names(tabla_cruda_location_accuracy)[1:top_n_location_accuracy]
# Eliminar el paréntesis extra que no es necesario
location_accuracy_agrupado <- location_accuracy[location_accuracy %in% top_valores_location_accuracy]
tabla_agrupada_location_accuracy <- table(location_accuracy_agrupado)
# CATEGORÍAS POR FRECUENCIA
orden_location_accuracy <- names(sort(tabla_agrupada_location_accuracy, decreasing = TRUE))
tabla_agrupada_location_accuracy <- tabla_agrupada_location_accuracy[orden_location_accuracy]
# TABLA DE FRECUENCIAS
Tabla_Location_Accuracy <- data.frame(
LOCATION_ACCURACY = names(tabla_agrupada_location_accuracy),
ni = as.numeric(tabla_agrupada_location_accuracy)
)
Tabla_Location_Accuracy$hi_porcentaje <- round((Tabla_Location_Accuracy$ni / sum(Tabla_Location_Accuracy$ni)) * 100, 2)
# Agregar fila TOTAL
Tabla_Location_Accuracy <- rbind(
Tabla_Location_Accuracy,
data.frame(LOCATION_ACCURACY = "TOTAL",
ni = sum(Tabla_Location_Accuracy$ni),
hi_porcentaje = 100)
)
print(Tabla_Location_Accuracy)
## LOCATION_ACCURACY ni hi_porcentaje
## 1 Within 50 metres 8359 36.59
## 2 Within 20 metres 6273 27.46
## 3 Within 200 metres 4005 17.53
## 4 Within 100 metres 2128 9.31
## 5 Within 5 metres 2081 9.11
## 6 TOTAL 22846 100.00
# GRÁFICOS
colores_location_accuracy <- brewer.pal(n = length(tabla_agrupada_location_accuracy), name = "Set2")
# 1. Frecuencia absoluta
barplot(tabla_agrupada_location_accuracy,
main = "Distribución de Precisión de localización",
col = colores_location_accuracy,
las = 1, cex.names = 0.6, cex.axis = 0.6,
xlab = "LOCATION_ACCURACY", ylab = "Cantidad")

# 2. Frecuencia absoluta con límite Y
barplot(tabla_agrupada_location_accuracy,
main = "Frecuencia de Precisión de localización",
col = colores_location_accuracy,
las = 1, cex.names = 0.6, cex.axis = 0.6,
ylim = c(0, max(tabla_agrupada_location_accuracy) + 5),
xlab = "LOCATION_ACCURACY", ylab = "Cantidad")

# 3. Frecuencia relativa (%)
barplot(Tabla_Location_Accuracy$hi_porcentaje[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"],
main = "Frecuencia relativa de Precisión de localización (local)",
names.arg = Tabla_Location_Accuracy$LOCATION_ACCURACY[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"],
col = colores_location_accuracy,
las = 1, cex.names = 0.6, cex.axis = 0.6,
xlab = "LOCATION_ACCURACY", ylab = "Porcentaje")

# 4. Frecuencia relativa con límite Y
barplot(Tabla_Location_Accuracy$hi_porcentaje[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"],
main = "Frecuencia relativa de Precisión de localización (global)",
names.arg = Tabla_Location_Accuracy$LOCATION_ACCURACY[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"],
col = colores_location_accuracy,
las = 1, cex.names = 0.6, cex.axis = 0.6,
ylim = c(0, 100),
xlab = "LOCATION_ACCURACY", ylab = "Porcentaje")

# 5. Gráfico circular
pie_data_location_accuracy <- Tabla_Location_Accuracy$ni[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"]
pie_percent_location_accuracy <- Tabla_Location_Accuracy$hi_porcentaje[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"]
etiquetas_pie_location_accuracy <- paste0(pie_percent_location_accuracy, "%")
n_colores_pie_location_accuracy <- max(3, length(pie_data_location_accuracy))
pie(pie_data_location_accuracy,
labels = etiquetas_pie_location_accuracy,
main = "Distribución porcentual de Precisión de localización",
col = brewer.pal(n = n_colores_pie_location_accuracy, name = "Set3"),
radius = 0.7, cex = 0.8)
legend(x = 1.2, y = -0.3,
legend = Tabla_Location_Accuracy$LOCATION_ACCURACY[Tabla_Location_Accuracy$LOCATION_ACCURACY != "TOTAL"],
fill = brewer.pal(n = n_colores_pie_location_accuracy, name = "Set3"),
title = "Precisión de localización",
cex = 0.5)

# Conclusion
conclusion <- "El análisis de la variable Precisión de Localización muestra cómo se distribuyen las categorías de precisión. Se ha evidenciado que la mayoría de los registros tienen una alta precisión, especialmente dentro de los 50 metros. Esta precisión es crucial para la planificación exacta de las perforaciones, ya que permite ubicar con mayor exactitud los puntos de perforación en proyectos de extracción de petróleo."