#EXAMEN UNIDAD 1

setwd("~/PYE VERANO")

library(readxl)
pozos <- read_excel("pozos.xlsx")
View(pozos)

#A) Ordene los datos de menor a mayor, indique valor mámimo/mínmo y 
#el rango total de datos.
ph <- pozos$PH
phord <- sort(ph)
phord
##   [1] 6.1 6.3 6.4 6.4 6.4 6.4 6.4 6.4 6.4 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5
##  [19] 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.5 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6
##  [37] 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.6 6.7 6.7 6.7 6.7 6.7
##  [55] 6.7 6.7 6.7 6.7 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8
##  [73] 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8
##  [91] 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8
## [109] 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.8 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9
## [127] 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9
## [145] 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 6.9 7.0 7.0
## [163] 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0
## [181] 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0
## [199] 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0
## [217] 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0 7.0
## [235] 7.0 7.0 7.0 7.0 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1
## [253] 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.1 7.2 7.2
## [271] 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.2 7.3 7.3 7.3 7.3 7.3 7.3 7.4 7.4
## [289] 7.4 7.4 7.4 7.4 7.5
maxp <- (max(ph))
maxp
## [1] 7.5
minp <- (min(ph))
minp
## [1] 6.1
rangop <- (maxp-minp)
rangop
## [1] 1.4
temp <- pozos$TEMP
tempord <- sort(temp)
tempord
##   [1] 25.6 25.8 26.2 26.3 26.3 26.4 26.4 26.8 26.8 26.9 27.0 27.0 27.1 27.2 27.2
##  [16] 27.3 27.3 27.3 27.3 27.4 27.4 27.4 27.4 27.4 27.5 27.5 27.5 27.5 27.5 27.5
##  [31] 27.5 27.5 27.5 27.5 27.5 27.5 27.6 27.7 27.7 27.7 27.7 27.8 27.8 27.8 27.8
##  [46] 27.8 27.8 27.8 27.8 27.8 27.8 27.8 27.9 27.9 27.9 27.9 27.9 27.9 27.9 27.9
##  [61] 27.9 27.9 27.9 27.9 27.9 27.9 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0
##  [76] 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.1 28.1 28.1 28.2 28.2 28.2
##  [91] 28.2 28.2 28.2 28.2 28.2 28.2 28.2 28.2 28.2 28.3 28.3 28.3 28.3 28.3 28.3
## [106] 28.3 28.4 28.4 28.4 28.4 28.4 28.4 28.4 28.5 28.5 28.5 28.5 28.5 28.5 28.5
## [121] 28.5 28.5 28.6 28.6 28.6 28.6 28.6 28.6 28.6 28.6 28.6 28.6 28.6 28.6 28.6
## [136] 28.6 28.6 28.6 28.6 28.6 28.6 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7 28.7
## [151] 28.7 28.7 28.7 28.7 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8 28.8
## [166] 28.8 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9 28.9
## [181] 28.9 28.9 28.9 28.9 29.0 29.0 29.0 29.0 29.0 29.0 29.0 29.0 29.0 29.0 29.0
## [196] 29.0 29.0 29.0 29.1 29.1 29.1 29.1 29.1 29.1 29.1 29.1 29.1 29.1 29.1 29.2
## [211] 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.2 29.3 29.3
## [226] 29.3 29.3 29.4 29.4 29.4 29.4 29.4 29.4 29.4 29.4 29.4 29.4 29.4 29.5 29.5
## [241] 29.5 29.5 29.5 29.5 29.5 29.5 29.5 29.6 29.6 29.6 29.7 29.7 29.8 29.8 29.8
## [256] 29.8 29.8 29.8 29.9 29.9 29.9 29.9 30.0 30.0 30.0 30.0 30.0 30.0 30.1 30.1
## [271] 30.1 30.1 30.2 30.2 30.2 30.3 30.3 30.3 30.3 30.4 30.5 30.6 30.8 30.9 31.1
## [286] 31.1 31.1 31.2 31.4 31.5 31.7 31.9 32.1
maxt <- (max(temp))
maxt
## [1] 32.1
mint <- (min(temp))
mint
## [1] 25.6
rangot <- (maxt-mint)
rangot
## [1] 6.5
#B)Intervalos y ancho de clase Sturges
nip <-(nclass.Sturges(ph))
nip
## [1] 10
anchop <- (rangop/nip)
anchop
## [1] 0.14
nit <-(nclass.Sturges(temp))
nit
## [1] 10
anchot <- (rangot/nit)
anchot
## [1] 0.65
#C)tablas de frecuencias 
library(fdth)
## 
## Attaching package: 'fdth'
## The following objects are masked from 'package:stats':
## 
##     sd, var
dist <- fdt(ph, breaks = "Sturges")
dist
##   Class limits   f   rf rf(%)  cf  cf(%)
##  [6.039,6.193)   1 0.00  0.34   1   0.34
##  [6.193,6.346)   1 0.00  0.34   2   0.68
##    [6.346,6.5)   7 0.02  2.39   9   3.07
##    [6.5,6.653)  40 0.14 13.65  49  16.72
##  [6.653,6.807)  67 0.23 22.87 116  39.59
##  [6.807,6.961)  44 0.15 15.02 160  54.61
##  [6.961,7.114) 108 0.37 36.86 268  91.47
##  [7.114,7.268)  12 0.04  4.10 280  95.56
##  [7.268,7.421)  12 0.04  4.10 292  99.66
##  [7.421,7.575)   1 0.00  0.34 293 100.00
#Esta tabla refleja que la clase con mas frecuencia absoluta es la de 
#[6.961-7.114]. Los dos primeros y el último intervalo tienen la misma 
#frecuencia (1) y por los resultados de la tabla podemos ver que la mayoría
#de los datos se encuentra concentrado en el centro.
#La frecuencia acumulada va sumando las frecuencias
#absolutas y siempre debe dar como resultado el número total de datos
#en este caso, 293.

library(fdth)
distt <- fdt(temp, breaks = "Sturges")
distt
##     Class limits  f   rf rf(%)  cf  cf(%)
##  [25.344,26.052)  2 0.01  0.68   2   0.68
##  [26.052,26.759)  5 0.02  1.71   7   2.39
##  [26.759,27.467) 17 0.06  5.80  24   8.19
##  [27.467,28.175) 63 0.22 21.50  87  29.69
##  [28.175,28.883) 79 0.27 26.96 166  56.66
##   [28.883,29.59) 81 0.28 27.65 247  84.30
##   [29.59,30.298) 28 0.10  9.56 275  93.86
##  [30.298,31.006)  9 0.03  3.07 284  96.93
##  [31.006,31.713)  7 0.02  2.39 291  99.32
##  [31.713,32.421)  2 0.01  0.68 293 100.00
#En este caso pasa lo mismo, la mayoría de los datos están en el centro.

#D) Elabore un histograma y polígono de frecuencias y un histograma de 
#frecuencia acumulada

hist(ph, breaks = "Sturges")

plot(dist, type = "fp")

plot(dist, type="cfh")

hist(temp, breaks = "Sturges")

plot(distt, type = "fp")

plot(distt, type="cfh")

#E) media mediana y moda
mean(ph)
## [1] 6.890444
median(ph)
## [1] 6.9
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
Mode(pozos$PH)
## [1] 7
mean(temp)
## [1] 28.69795
median(temp)
## [1] 28.7
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
Mode(pozos$TEMP)
## [1] 28.6
#La media, mediana y moda son casi iguales, solo por unos decimales de diferencia
#lo que significa que la distribució es simétrica o casi simétrica.

#F) varianza y desviación estándar 
var(ph)
## [1] 0.04908645
sd(ph)
## [1] 0.2215546
var(temp)
## [1] 1.035407
sd(temp)
## [1] 1.017549
#En el ph el valor es muy cercano a 0, por lo que los datos son muy parecidos y no 
#están dispersos. En la temperatura es un poco mas grande pero sigue estando cerca de 0.
#Ni la varianza ni la desviación estándar pueden ser negativas, porque la 
#varianza viene de elevar datos al cuadrado y la desviación es la raíz de 
#la varianza, y no puede haber raices negativas.

#G) Elabore gráfico de caja y bigote
boxplot(ph)

boxplot(temp)

#H)Gráfica de dispersión de pH vs temperatura.
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

ggplot(data=pozos)+
  geom_point(mapping=aes(x=ph, y=temp))

#Las variables no tienen relación porque no cambia una en razón de la otra.