#Tema:Variable cualitativa Formacion geologica #Autor:Edison Arteaga #Fecha:14/4/2025 #Importar datos
library(readr)
setwd("/cloud/project")
read_csv("point_oil-gas-other-regulated-wells-beginning-1860.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 42045 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (38): Well Name, Company Name, Well Type, Map Symbol, Well Status, Stat...
## dbl (12): API Well Number, County Code, API Hole Number, Sidetrack, Complet...
## lgl (1): Financial Security
## dttm (1): Date Last Modified
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 42,045 × 52
## `API Well Number` `County Code` `API Hole Number` Sidetrack Completion
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3.10e13 1 1072 0 0
## 2 3.10e13 1 1073 0 0
## 3 3.10e13 1 21007 0 0
## 4 3.10e13 1 21008 0 0
## 5 3.10e13 1 21009 0 0
## 6 3.10e13 1 21010 0 0
## 7 3.10e13 1 21011 0 0
## 8 3.10e13 1 21014 0 0
## 9 3.10e13 1 21015 0 0
## 10 3.10e13 1 21016 0 0
## # ℹ 42,035 more rows
## # ℹ 47 more variables: `Well Name` <chr>, `Company Name` <chr>,
## # `Operator Number` <dbl>, `Well Type` <chr>, `Map Symbol` <chr>,
## # `Well Status` <chr>, `Status Date` <chr>, `Permit Application Date` <chr>,
## # `Permit Issued Date` <chr>, `Date Spudded` <chr>,
## # `Date of Total Depth` <chr>, `Date Well Completed` <chr>,
## # `Date Well Plugged` <chr>, `Date Well Confidentiality Ends` <chr>, …
datos<- read.csv("point_oil-gas-other-regulated-wells-beginning-1860.csv", header = T, sep = ",", dec = ".")
#Formacion geologica Extraer variable
Formacion<-datos$Producing.Formation
TDF_Formacion<-table(Formacion)
TDF_Formacion
## Formacion
## - Akron
## 10115 39
## Bass Island Beekmantown Group
## 107 2
## Bertie Black River
## 2 156
## Bradford Bradford & Chipmunk
## 2015 372
## Bradford 1st & 2nd Bradford 1st & Chipmunk
## 3 8
## Bradford 2nd & 3rd Bradford Second
## 54 25
## Bradford Third Camillus
## 598 10
## Chipmunk Chipmunk & Bradford 2nd
## 2365 436
## Chipmunk & Bradford 3rd Chipmunk & Harrisburg
## 45 4
## Chipmunk Bradford 1st & 2nd Chipmunk Bradford 1st2nd3rd
## 82 76
## Chipmunk Bradford 2nd & 3rd Clarksville
## 625 140
## Clinton Confidential
## 3 31
## Devonian Shale Fordham Gneiss
## 6 26
## Fulmer Valley Glade
## 1956 737
## Grimsby Hamilton
## 307 20
## Harrisburg Helderberg
## 2 26
## Herkimer Herkimer-Oneida
## 67 5
## Inwood Marble Laona
## 6 1
## Little Falls Lockport
## 1 14
## Lorraine Manhattan Schist
## 1 35
## Manlius Marcellus
## 2 50
## Medina Medina-Queenston
## 8843 5
## NA - Geoexchange Niagara
## 5 25
## Nunda Oneida
## 1 35
## Oneida-Oswego Onondaga
## 11 202
## Onondaga-Bass Island Oriskany
## 2 609
## Oswego Penny
## 2 82
## Penny & Fulmer Valley Potsdam
## 241 5
## Precambrian Queenston
## 1 651
## Retsof Rhinestreet
## 1 21
## Rice Brook Rice Brook & Chipmunk
## 34 12
## Richburg Richburg-Waugh & Porter
## 8426 22
## Rochester Shale Salina
## 1 378
## Sauquoit Scio
## 1 641
## Sodus Shale Surficial
## 1 3
## Syracuse Theresa
## 109 39
## Trenton Tully
## 233 4
## Unknown Upper Devonian
## 12 494
## Upper Devonian Shale Utica
## 8 5
## Vernon Wappinger Limestone
## 26 2
## Waugh & Porter Whirlpool
## 249 25
## Yonkers Gneiss
## 3
#Crea tabla de formacion geologica
Tabla_Formacion<-as.data.frame(TDF_Formacion)
Tabla_Formacion
## Formacion Freq
## 1 - 10115
## 2 Akron 39
## 3 Bass Island 107
## 4 Beekmantown Group 2
## 5 Bertie 2
## 6 Black River 156
## 7 Bradford 2015
## 8 Bradford & Chipmunk 372
## 9 Bradford 1st & 2nd 3
## 10 Bradford 1st & Chipmunk 8
## 11 Bradford 2nd & 3rd 54
## 12 Bradford Second 25
## 13 Bradford Third 598
## 14 Camillus 10
## 15 Chipmunk 2365
## 16 Chipmunk & Bradford 2nd 436
## 17 Chipmunk & Bradford 3rd 45
## 18 Chipmunk & Harrisburg 4
## 19 Chipmunk Bradford 1st & 2nd 82
## 20 Chipmunk Bradford 1st2nd3rd 76
## 21 Chipmunk Bradford 2nd & 3rd 625
## 22 Clarksville 140
## 23 Clinton 3
## 24 Confidential 31
## 25 Devonian Shale 6
## 26 Fordham Gneiss 26
## 27 Fulmer Valley 1956
## 28 Glade 737
## 29 Grimsby 307
## 30 Hamilton 20
## 31 Harrisburg 2
## 32 Helderberg 26
## 33 Herkimer 67
## 34 Herkimer-Oneida 5
## 35 Inwood Marble 6
## 36 Laona 1
## 37 Little Falls 1
## 38 Lockport 14
## 39 Lorraine 1
## 40 Manhattan Schist 35
## 41 Manlius 2
## 42 Marcellus 50
## 43 Medina 8843
## 44 Medina-Queenston 5
## 45 NA - Geoexchange 5
## 46 Niagara 25
## 47 Nunda 1
## 48 Oneida 35
## 49 Oneida-Oswego 11
## 50 Onondaga 202
## 51 Onondaga-Bass Island 2
## 52 Oriskany 609
## 53 Oswego 2
## 54 Penny 82
## 55 Penny & Fulmer Valley 241
## 56 Potsdam 5
## 57 Precambrian 1
## 58 Queenston 651
## 59 Retsof 1
## 60 Rhinestreet 21
## 61 Rice Brook 34
## 62 Rice Brook & Chipmunk 12
## 63 Richburg 8426
## 64 Richburg-Waugh & Porter 22
## 65 Rochester Shale 1
## 66 Salina 378
## 67 Sauquoit 1
## 68 Scio 641
## 69 Sodus Shale 1
## 70 Surficial 3
## 71 Syracuse 109
## 72 Theresa 39
## 73 Trenton 233
## 74 Tully 4
## 75 Unknown 12
## 76 Upper Devonian 494
## 77 Upper Devonian Shale 8
## 78 Utica 5
## 79 Vernon 26
## 80 Wappinger Limestone 2
## 81 Waugh & Porter 249
## 82 Whirlpool 25
## 83 Yonkers Gneiss 3
#Agrupar la variable en tres secciones diferentes
for (i in 1:83) {
if(Tabla_Formacion$Freq[i] < 100)
Tabla_Formacion$Grupo[i]<-as.character("Formaciones geologicas pequeñas")
else if (Tabla_Formacion$Freq[i] >= 100 && Tabla_Formacion$Freq[i]<1000)
Tabla_Formacion$Grupo[i]<- as.character("Formaciones geologicas Mediana")
else
Tabla_Formacion$Grupo[i]<-as.character("formaciones Geologicas grandes")
}
Grupo<-table(Tabla_Formacion$Grupo)
Grupo
##
## formaciones Geologicas grandes Formaciones geologicas Mediana
## 6 19
## Formaciones geologicas pequeñas
## 58
barplot(Grupo)
hi_grupo<-Grupo/sum(Grupo)
sum(hi_grupo)
## [1] 1
hi_grupo
##
## formaciones Geologicas grandes Formaciones geologicas Mediana
## 0.07228916 0.22891566
## Formaciones geologicas pequeñas
## 0.69879518
hi_grupo<-hi_grupo*100
hi_grupo<-round(hi_grupo,2)
hi_grupo
##
## formaciones Geologicas grandes Formaciones geologicas Mediana
## 7.23 22.89
## Formaciones geologicas pequeñas
## 69.88
#GDF, Diagrama de barras local
barplot(Grupo,main="Gráfica No. 3.1: Distribución de frecuencia de la formacion
geologica en la que se encuentran los pozos de hidrocarburos",
col="brown",xlab = "Formacion geologica",ylab = "Cantidad")
#GDF, Diagrama de barras Global
barplot(hi_grupo,main="Gráfica No. 3.2: Distribución de frecuencia de la formacion
geologica en la que se encuentran los pozos de hidrocarburos",col="brown",xlab = "Formacion Geologica",ylab = "Porcentaje(%)", ylim = c(0,100))
# Diagrama circular (frecuencia relativa en porcentaje)
etiqueta<- paste(hi_grupo, "%", sep=" ")
pie(hi_grupo,labels=etiqueta, clockwise = TRUE,main="Gráfica No.3.3:Distribución de frecuencia de la formacion
geologica en la que se encuentran los pozos de hidrocarburos"
,col=rainbow(3))
legend("topright",c("formaciones Geologicas grandes","Formaciones geologicas Mediana","Formaciones geologicas pequeñas"),cex=0.5, pt.cex=2, fill=rainbow(3),title="Leyenda ")