Importancia de la simulación contexto estadístico

Introducción Markdown

Campo de R -> “Ctrl+Alt+i”

set.seed(2023)
# Generación de datos
diam = runif(n=24,min = 4,max = 6)
diam
##  [1] 4.933228 4.670382 4.325635 4.792240 4.060783 4.241770 4.852331 5.235716
##  [9] 4.526417 4.952648 5.724638 4.297757 4.360860 5.998547 5.683482 4.285298
## [17] 4.689697 5.779251 4.639697 5.418032 5.256303 4.876345 4.679792 5.866185

Campo de ecuación \(\LaTeX\)

\[t=\frac{\bar x - \mu}{s/\sqrt{n}}\]

La ecuación anterior representa un estadistico de prueba, t-student
set.seed(2023)
# Redondear a 2 decimales
diam=round(diam,2)
orient = gl(n=2, k =12, length=24, labels=c("Ecuatorial","Longitudinal"))
df=data.frame(orient, diam)
head(df)
##       orient diam
## 1 Ecuatorial 4.93
## 2 Ecuatorial 4.67
## 3 Ecuatorial 4.33
## 4 Ecuatorial 4.79
## 5 Ecuatorial 4.06
## 6 Ecuatorial 4.24
tail(df)
##          orient diam
## 19 Longitudinal 4.64
## 20 Longitudinal 5.42
## 21 Longitudinal 5.26
## 22 Longitudinal 4.88
## 23 Longitudinal 4.68
## 24 Longitudinal 5.87
# Distribución exponencial 

diam2 = rexp(n = 24,rate = 1/4)

# boxplot 
boxplot(diam2~orient,horizontal = T,col=c("lightgreen","lightblue"))

Resumen estadístico descriptivo

boxplot(diam~orient,horizontal = T, col=c("lightgreen","lightblue"))

boxplot(diam2~orient,horizontal = T,col=c("lightgreen","lightblue"))

Agregarle promedios a las cajas

# Medias de diam
m1 = tapply(diam,orient,mean);m1
##   Ecuatorial Longitudinal 
##     4.717500     5.129167
m2 = tapply(diam2,orient,mean);m2
##   Ecuatorial Longitudinal 
##     3.798262     3.315129
# boxplot + medias
boxplot(diam~orient,horizontal = T,col=c("lightgreen","lightblue"),xlab = "Diametro (cm)",ylab="Orientación")
points(y=1:2,x=m1,pch=16,col="red",cex=1.5)
rug(diam[which(orient=="Ecuatorial")],lwd = 3,side = 3,col="darkgreen")
rug(diam[which(orient=="Longitudinal")],lwd = 3,side = 1,col="blue")

# Densidad 
set.seed(1234)
# Generación de datos 
diam = runif(n = 240,min = 4,max = 6)
orient = gl(n = 2,k = 120,length = 240,labels = c("ecuatorial","longitudinal"))
# Usando ggplot2 
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
# Usando ggplot2 
library(ggplot2)
# Grafico de densidades
ggplot(df,aes(x=diam,fill=orient))+
  geom_density(alpha=0.4)

# Grafico de histogramas
ggplot(df,aes(x=diam,fill=orient))+
  geom_histogram(alpha=0.4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Grafico de cajas o boxplot
ggplot(df,aes(x=diam,fill=orient))+
  geom_boxplot(alpha=0.4)

# Grafico de violines
ggplot(df,aes(x=diam,fill=orient,y=orient))+
  geom_violin(alpha=0.4)

df2=split(diam,orient)
df2 = data.frame(ecuatorial=df2$ecuatorial,
                 longitudinal = df2$longitudinal)
ggplot(df2,aes(x=ecuatorial,
               y=longitudinal))+  geom_point(size=2)

Investigar: Bihistograma(quiz) codigo y como interpretarlo.

Resumen descriptivo numerico

library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
psych::describe(diam)
##    vars   n mean   sd median trimmed mad  min max range skew kurtosis   se
## X1    1 240 4.98 0.57   4.99    4.97 0.7 4.02   6  1.98 0.11    -1.14 0.04
describeBy(diam,group=orient)
## 
##  Descriptive statistics by group 
## group: ecuatorial
##    vars   n mean   sd median trimmed mad  min  max range skew kurtosis   se
## X1    1 120 4.85 0.57   4.65    4.83 0.6 4.02 5.98  1.97 0.34    -1.18 0.05
## ------------------------------------------------------------ 
## group: longitudinal
##    vars   n mean   sd median trimmed  mad  min max range  skew kurtosis   se
## X1    1 120  5.1 0.53   5.13    5.11 0.65 4.04   6  1.95 -0.05    -0.96 0.05

Coeficiente de variación

\[\%~CV = \frac{s}{\bar{x}}\times 100\]

# Mi primera función

fun_cv=function(datos)
{
  media=mean(datos)
  desv=sd(datos)
  if(media==0 & desv == 0){print("Indeterminación")}
  else{
  cv=100*desv/media
  return(cv)}
}

# Evaluando la función

cat("%CV ecuatorial",fun_cv(df2$ecuatorial))
## %CV ecuatorial 11.7698
cat("%CV longitudinal",fun_cv(df2$longitudinal))
## %CV longitudinal 10.44618
datos=rep(0,50)
fun_cv(datos)
## [1] "Indeterminación"
# Validando inicialmente la funcion  

fun_cv= function(datos){
  media=mean(datos)
  desv = sd(datos)
  if(media==0 & desv==0){
    print("Indeterminación")
  }else{
  cv = 100*desv/media
  return(cv)
  }
}
fun_cv(datos)
## [1] "Indeterminación"