### Clase 5 
### Introduccion a la probabilidad en R 
### Distribuciones de probabilidad 
set.seed(2020)
options(digits = 5)
datos <- round(rnorm(50, 3, 0.4), 1)
datos2 <- round(runif(50, 2, 4))
tablaf <- table(datos) # Tabla de frecuencias
tablap <- tablaf/50 # Tabla de proporciones
plot(tablaf)

plot(tablap)

grilla <- expand.grid(x = 1:10, y = 1:5)
windows(7, 7)
plot(grilla, cex = datos, col = rainbow(7)[5], 
     pch = 19)

sum(tablap)
## [1] 1
hist(datos, probability = T)

# Funcion de distribucion de probabilidad
### Histograma con distribucion normal ###
hist(datos, density = 20, breaks = 20, prob = T, 
     xlab = "x", 
     main = "curva normal sobre el histograma")
curve(dnorm(x, 3, 0.4), col = "darkblue", 
      lwd = 2, add = T, yaxt = "n")

#Distribuciones 
# p : probabilidad
# r : random 
# q : cuantil 
# d : densidad


################################################
## Peso de un becerro al nacer es una variable
## aleatoria que sigue una distribución normal
## con una media de 40 kg y una desviación estandar
## de 4 kg
## X ~ N (mu = 40, sigma = 4)
## ¿Cuál es la probabilidad de que un becerro nazca
## con más de 50kg? p = (x > 50)
## p = (Z > (x - mu)/sigma) = Z > 2.5 = 0.0062 = 0.62%
qnorm(0.0062, 40, 4, lower.tail = F)
## [1] 50.002
dnorm(50, 40, 4)
## [1] 0.0043821
pnorm(50, 40, 4,lower.tail = F) # Cuidado, no se utiliza el cuantil
## [1] 0.0062097
#install.packages("tigerstats")
library(tigerstats)
## Warning: package 'tigerstats' was built under R version 3.6.3
## Loading required package: abd
## Warning: package 'abd' was built under R version 3.6.3
## Loading required package: nlme
## Loading required package: lattice
## Loading required package: grid
## Loading required package: mosaic
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: ggformula
## Loading required package: ggplot2
## Loading required package: ggstance
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median,
##     prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
## Welcome to tigerstats!
## To learn more about this package, consult its website:
##  http://homerhanumat.github.io/tigerstats

pnormGC(50, region="above", mean=40,
        sd=4,graph=TRUE)

## [1] 0.0062097
## Probabilidad uniforme 
#¿Cual es la probabilidad de que el
# pH sea mayor a 5.75?

v4<-runif(n = 30,min = 5,max = 6)
hist(v4)
rug(v4,col="red",lwd=3)

pu1<-100*punif(q = 5.75,min = 5,
               max = 6,lower.tail = F)

####################################################
## Introduccion al analisis de regresion lineal simple
set.seed(2020)
rto <- sort(rnorm(50, 3, 0.5))
dosis <- rep(seq(0, 8, 2), each = 10)
class(dosis)
## [1] "numeric"
plot(dosis, rto, pch = 19, xlim = c(0,10),
     ylim = c(0, 10))
mod1 <- lm(rto~dosis)
summary (mod1)
## 
## Call:
## lm(formula = rto ~ dosis)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8825 -0.0751  0.0444  0.1317  0.4551 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.3631     0.0603    39.2   <2e-16 ***
## dosis         0.1749     0.0123    14.2   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.246 on 48 degrees of freedom
## Multiple R-squared:  0.808,  Adjusted R-squared:  0.804 
## F-statistic:  202 on 1 and 48 DF,  p-value: <2e-16
curve(1.9347 + 0.3705*x, add = T)