#Tema:Variable cualitativa Formacion geologica #Autor:Edison Arteaga #Fecha:14/4/2025 #Importar datos

library(readr)
setwd("/cloud/project")
read_csv("point_oil-gas-other-regulated-wells-beginning-1860.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 42045 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (38): Well Name, Company Name, Well Type, Map Symbol, Well Status, Stat...
## dbl  (12): API Well Number, County Code, API Hole Number, Sidetrack, Complet...
## lgl   (1): Financial Security
## dttm  (1): Date Last Modified
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 42,045 × 52
##    `API Well Number` `County Code` `API Hole Number` Sidetrack Completion
##                <dbl>         <dbl>             <dbl>     <dbl>      <dbl>
##  1           3.10e13             1              1072         0          0
##  2           3.10e13             1              1073         0          0
##  3           3.10e13             1             21007         0          0
##  4           3.10e13             1             21008         0          0
##  5           3.10e13             1             21009         0          0
##  6           3.10e13             1             21010         0          0
##  7           3.10e13             1             21011         0          0
##  8           3.10e13             1             21014         0          0
##  9           3.10e13             1             21015         0          0
## 10           3.10e13             1             21016         0          0
## # ℹ 42,035 more rows
## # ℹ 47 more variables: `Well Name` <chr>, `Company Name` <chr>,
## #   `Operator Number` <dbl>, `Well Type` <chr>, `Map Symbol` <chr>,
## #   `Well Status` <chr>, `Status Date` <chr>, `Permit Application Date` <chr>,
## #   `Permit Issued Date` <chr>, `Date Spudded` <chr>,
## #   `Date of Total Depth` <chr>, `Date Well Completed` <chr>,
## #   `Date Well Plugged` <chr>, `Date Well Confidentiality Ends` <chr>, …
datos<- read.csv("point_oil-gas-other-regulated-wells-beginning-1860.csv", header = T, sep = ",", dec = ".")

#Formacion geologica Extraer variable

Formacion<-datos$Producing.Formation

TDF_Formacion<-table(Formacion)
TDF_Formacion
## Formacion
##                           -                       Akron 
##                       10115                          39 
##                 Bass Island           Beekmantown Group 
##                         107                           2 
##                      Bertie                 Black River 
##                           2                         156 
##                    Bradford         Bradford & Chipmunk 
##                        2015                         372 
##          Bradford 1st & 2nd     Bradford 1st & Chipmunk 
##                           3                           8 
##          Bradford 2nd & 3rd             Bradford Second 
##                          54                          25 
##              Bradford Third                    Camillus 
##                         598                          10 
##                    Chipmunk     Chipmunk & Bradford 2nd 
##                        2365                         436 
##     Chipmunk & Bradford 3rd       Chipmunk & Harrisburg 
##                          45                           4 
## Chipmunk Bradford 1st & 2nd Chipmunk Bradford 1st2nd3rd 
##                          82                          76 
## Chipmunk Bradford 2nd & 3rd                 Clarksville 
##                         625                         140 
##                     Clinton                Confidential 
##                           3                          31 
##              Devonian Shale              Fordham Gneiss 
##                           6                          26 
##               Fulmer Valley                       Glade 
##                        1956                         737 
##                     Grimsby                    Hamilton 
##                         307                          20 
##                  Harrisburg                  Helderberg 
##                           2                          26 
##                    Herkimer             Herkimer-Oneida 
##                          67                           5 
##               Inwood Marble                       Laona 
##                           6                           1 
##                Little Falls                    Lockport 
##                           1                          14 
##                    Lorraine            Manhattan Schist 
##                           1                          35 
##                     Manlius                   Marcellus 
##                           2                          50 
##                      Medina            Medina-Queenston 
##                        8843                           5 
##            NA - Geoexchange                     Niagara 
##                           5                          25 
##                       Nunda                      Oneida 
##                           1                          35 
##               Oneida-Oswego                    Onondaga 
##                          11                         202 
##        Onondaga-Bass Island                    Oriskany 
##                           2                         609 
##                      Oswego                       Penny 
##                           2                          82 
##       Penny & Fulmer Valley                     Potsdam 
##                         241                           5 
##                 Precambrian                   Queenston 
##                           1                         651 
##                      Retsof                 Rhinestreet 
##                           1                          21 
##                  Rice Brook       Rice Brook & Chipmunk 
##                          34                          12 
##                    Richburg     Richburg-Waugh & Porter 
##                        8426                          22 
##             Rochester Shale                      Salina 
##                           1                         378 
##                    Sauquoit                        Scio 
##                           1                         641 
##                 Sodus Shale                   Surficial 
##                           1                           3 
##                    Syracuse                     Theresa 
##                         109                          39 
##                     Trenton                       Tully 
##                         233                           4 
##                     Unknown              Upper Devonian 
##                          12                         494 
##        Upper Devonian Shale                       Utica 
##                           8                           5 
##                      Vernon         Wappinger Limestone 
##                          26                           2 
##              Waugh & Porter                   Whirlpool 
##                         249                          25 
##              Yonkers Gneiss 
##                           3

#Crea tabla de formacion geologica

Tabla_Formacion<-as.data.frame(TDF_Formacion)
Tabla_Formacion
##                      Formacion  Freq
## 1                            - 10115
## 2                        Akron    39
## 3                  Bass Island   107
## 4            Beekmantown Group     2
## 5                       Bertie     2
## 6                  Black River   156
## 7                     Bradford  2015
## 8          Bradford & Chipmunk   372
## 9           Bradford 1st & 2nd     3
## 10     Bradford 1st & Chipmunk     8
## 11          Bradford 2nd & 3rd    54
## 12             Bradford Second    25
## 13              Bradford Third   598
## 14                    Camillus    10
## 15                    Chipmunk  2365
## 16     Chipmunk & Bradford 2nd   436
## 17     Chipmunk & Bradford 3rd    45
## 18       Chipmunk & Harrisburg     4
## 19 Chipmunk Bradford 1st & 2nd    82
## 20 Chipmunk Bradford 1st2nd3rd    76
## 21 Chipmunk Bradford 2nd & 3rd   625
## 22                 Clarksville   140
## 23                     Clinton     3
## 24                Confidential    31
## 25              Devonian Shale     6
## 26              Fordham Gneiss    26
## 27               Fulmer Valley  1956
## 28                       Glade   737
## 29                     Grimsby   307
## 30                    Hamilton    20
## 31                  Harrisburg     2
## 32                  Helderberg    26
## 33                    Herkimer    67
## 34             Herkimer-Oneida     5
## 35               Inwood Marble     6
## 36                       Laona     1
## 37                Little Falls     1
## 38                    Lockport    14
## 39                    Lorraine     1
## 40            Manhattan Schist    35
## 41                     Manlius     2
## 42                   Marcellus    50
## 43                      Medina  8843
## 44            Medina-Queenston     5
## 45            NA - Geoexchange     5
## 46                     Niagara    25
## 47                       Nunda     1
## 48                      Oneida    35
## 49               Oneida-Oswego    11
## 50                    Onondaga   202
## 51        Onondaga-Bass Island     2
## 52                    Oriskany   609
## 53                      Oswego     2
## 54                       Penny    82
## 55       Penny & Fulmer Valley   241
## 56                     Potsdam     5
## 57                 Precambrian     1
## 58                   Queenston   651
## 59                      Retsof     1
## 60                 Rhinestreet    21
## 61                  Rice Brook    34
## 62       Rice Brook & Chipmunk    12
## 63                    Richburg  8426
## 64     Richburg-Waugh & Porter    22
## 65             Rochester Shale     1
## 66                      Salina   378
## 67                    Sauquoit     1
## 68                        Scio   641
## 69                 Sodus Shale     1
## 70                   Surficial     3
## 71                    Syracuse   109
## 72                     Theresa    39
## 73                     Trenton   233
## 74                       Tully     4
## 75                     Unknown    12
## 76              Upper Devonian   494
## 77        Upper Devonian Shale     8
## 78                       Utica     5
## 79                      Vernon    26
## 80         Wappinger Limestone     2
## 81              Waugh & Porter   249
## 82                   Whirlpool    25
## 83              Yonkers Gneiss     3

#Agrupar la variable en tres secciones diferentes

for (i in 1:83) {
  if(Tabla_Formacion$Freq[i] < 100)
    Tabla_Formacion$Grupo[i]<-as.character("Formaciones geologicas pequeñas")
  else if (Tabla_Formacion$Freq[i] >= 100 && Tabla_Formacion$Freq[i]<1000)
    Tabla_Formacion$Grupo[i]<- as.character("Formaciones geologicas Mediana")
  else
    Tabla_Formacion$Grupo[i]<-as.character("formaciones Geologicas grandes")
  
}
Grupo<-table(Tabla_Formacion$Grupo)
Grupo
## 
##  formaciones Geologicas grandes  Formaciones geologicas Mediana 
##                               6                              19 
## Formaciones geologicas pequeñas 
##                              58
barplot(Grupo)

hi_grupo<-Grupo/sum(Grupo)
sum(hi_grupo)
## [1] 1
hi_grupo
## 
##  formaciones Geologicas grandes  Formaciones geologicas Mediana 
##                      0.07228916                      0.22891566 
## Formaciones geologicas pequeñas 
##                      0.69879518
hi_grupo<-hi_grupo*100
hi_grupo<-round(hi_grupo,2)
hi_grupo
## 
##  formaciones Geologicas grandes  Formaciones geologicas Mediana 
##                            7.23                           22.89 
## Formaciones geologicas pequeñas 
##                           69.88

#GDF, Diagrama de barras local

barplot(Grupo,main="Gráfica No. 3.1: Distribución de frecuencia de la formacion
        geologica en la que se encuentran los pozos de hidrocarburos",
        col="brown",xlab = "Formacion geologica",ylab = "Cantidad")

#GDF, Diagrama de barras Global

barplot(hi_grupo,main="Gráfica No. 3.2: Distribución de frecuencia de la formacion
        geologica en la que se encuentran los pozos de hidrocarburos",col="brown",xlab = "Formacion Geologica",ylab = "Porcentaje(%)", ylim = c(0,100))

# Diagrama circular (frecuencia relativa en porcentaje)

etiqueta<- paste(hi_grupo, "%", sep=" ")
pie(hi_grupo,labels=etiqueta, clockwise = TRUE,main="Gráfica No.3.3:Distribución de frecuencia de la formacion
        geologica en la que se encuentran los pozos de hidrocarburos"
    ,col=rainbow(3))
legend("topright",c("formaciones Geologicas grandes","Formaciones geologicas Mediana","Formaciones geologicas pequeñas"),cex=0.5, pt.cex=2, fill=rainbow(3),title="Leyenda ")