Punto 1 - Simulación Resultado de la Suma del Lanzamiento de dos Dados
sumdados=function(){
dado1=1:6
dado2=1:6
x=sample(dado1,1,replace=TRUE)+sample(dado2,1,replace=TRUE) #size n
return(x)
}
sumdados()
## [1] 3
simulamultlilanza=function(nlanza,valorcondi){
lanzamientos=array(NA,nlanza)
for (i in 1:nlanza){
lanzamientos[i]=sumdados()
}
return(sum(lanzamientos==valorcondi))}
simulamultlilanza(100,12)
## [1] 4
dado1=1:6
dado2=1:6
esp_dado1=length(dado1)
esp_dado2=length(dado2)
espacio=expand.grid(dado1,dado2)
y=apply(espacio,1,sum)
data.frame(espacio,y)
## Var1 Var2 y
## 1 1 1 2
## 2 2 1 3
## 3 3 1 4
## 4 4 1 5
## 5 5 1 6
## 6 6 1 7
## 7 1 2 3
## 8 2 2 4
## 9 3 2 5
## 10 4 2 6
## 11 5 2 7
## 12 6 2 8
## 13 1 3 4
## 14 2 3 5
## 15 3 3 6
## 16 4 3 7
## 17 5 3 8
## 18 6 3 9
## 19 1 4 5
## 20 2 4 6
## 21 3 4 7
## 22 4 4 8
## 23 5 4 9
## 24 6 4 10
## 25 1 5 6
## 26 2 5 7
## 27 3 5 8
## 28 4 5 9
## 29 5 5 10
## 30 6 5 11
## 31 1 6 7
## 32 2 6 8
## 33 3 6 9
## 34 4 6 10
## 35 5 6 11
## 36 6 6 12
prob_teorica=table(y)/(esp_dado1*esp_dado2)
prob_teorica
## y
## 2 3 4 5 6 7 8
## 0.02777778 0.05555556 0.08333333 0.11111111 0.13888889 0.16666667 0.13888889
## 9 10 11 12
## 0.11111111 0.08333333 0.05555556 0.02777778
plot(2:12,prob_teorica,type='b')
Punto 2 - Simulación Concepto de Distribucción Muestral (Caso proporciones)
createpob=function(min_val,max_val,n,p){
pob=c(rep(x = max_val,n*p),rep(x = min_val,n*(1-p)))
return(pob)
}
df_pob=createpob(0,1,1000,0.10)
df_pob
## [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0
## [112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [519] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [556] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [593] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [630] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [667] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [704] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [741] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [778] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [815] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [852] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [889] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [926] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [963] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [1000] 0
muestrapob=function(df,tam_muestra){
efectivos=sum(sample(df,size =tam_muestra ))/tam_muestra
return(efectivos)
}
muestrapob(df_pob,200)
## [1] 0.105
calc_por_uno=function(n_muestra){
pob=c(rep(x = 1,100),rep(x = 0,900))
return(sum(sample(pob,size = n_muestra))/n_muestra)
}
calc_por_uno(n_muestra = 200)
## [1] 0.12
porc_muestra=sapply(rep(200,1000), calc_por_uno)
summary(porc_muestra)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0400 0.0850 0.1000 0.1001 0.1150 0.1600
hist(porc_muestra, main = 'Distribucion % muestra', col = 'ivory')
boxplot(porc_muestra,col = 'ivory')
library(stringr)
library(modeest)
## Warning: package 'modeest' was built under R version 4.1.3
library(ggplot2)
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.2
## Registered S3 method overwritten by 'httr':
## method from
## print.response rmutil
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.1.2
Punto 3 - Función que Calcula Descriptivos Univariados a. Genere una función que calcule indicadores y graficos descriptivos de una variable cuantitativa.
# Poblacion que contiene la edad de 1000 mujeres y su etnia (1 Blanca, 2 Negro, 3 Mulata)
edad=sample(15:85,1000,replace = TRUE)
etnia=sample(c("Blanca","Negra","Mulata"),1000,replace = TRUE)
df_simulado =data.frame(
"edad" = edad,
"etnia"=as.character(etnia)
)
head(df_simulado)
## edad etnia
## 1 21 Blanca
## 2 18 Blanca
## 3 22 Mulata
## 4 34 Negra
## 5 40 Blanca
## 6 65 Blanca
a1. Indicadores variable cuantitativa.
mtc_cuantitativa = function(datos,name_var) {
cantidad=length(datos[[name_var]])
min = min(datos[[name_var]], na.rm = TRUE)
q1 = quantile(datos[[name_var]], probs = 0.25, na.rm = TRUE)
media = mean.default(datos[[name_var]], na.rm = TRUE)
media_rec = mean.default(datos[[name_var]], trim = 0.025, na.rm = TRUE)
mediana = median.default(datos[[name_var]], na.rm = TRUE)
moda = mfv(datos[[name_var]])
var = var(datos[[name_var]], na.rm = TRUE)
desvest = sd(datos[[name_var]], na.rm = TRUE)
q3 = quantile(datos[[name_var]], probs = 0.75, na.rm = TRUE)
max = max(datos[[name_var]], na.rm = TRUE)
descriptivos =as.numeric(c(cantidad,min, q1, media, media_rec, mediana, moda,var, desvest, q3, max))
nombres = c("Cantidad","Mínimo", "Q1", "Media", "Media recortada", "Mediana", "Moda","Varianza", "Desviación Estándar", "Q3", "Máximo")
desc_variables = as.data.frame(rbind(nombres,descriptivos))
print(str_c("Descriptivos", "-", name_var))
return(desc_variables)
}
a2. Graficos variable cuantitativa
graficar_cuantitativa=function(datos3,name_var){
g1=ggplot(datos3,aes(x=datos3[[name_var]]))+geom_histogram(bins=30)+ scale_x_continuous(name_var)+theme_bw()
##g2=boxplot(x=datos[[name_var]], ylab='Edan (Años)',col=c('lightblue'))
g3=ggplot(datos3,aes(x=datos3[[name_var]]))+geom_boxplot()+ scale_x_continuous(name_var)+theme_bw()
return(ggarrange(g1,g3,labels = c("A", "B"),ncol = 2, nrow = 1))
}
mtc_cualitativa=function(datos,name_var) {
print(str_c("Proporición de ", " - ", name_var," del conjunto de datos"))
return(prop.table(table(datos[[name_var]]))*100)
}
b2. Graficos variable cualitativa
graficar_cualitativa=function(datos,name_var){
g1 =ggplot(datos, aes(datos[[name_var]]))+geom_bar()+ ggtitle(str_c("Cantidades de ", " - ", name_var))+xlab(name_var)+theme_bw()
g2=ggplot(datos, aes(x="", fill=factor(datos[[name_var]]))) + geom_bar( width=1) +ylab(name_var)+ coord_polar("y", start=0)
return(ggarrange(g1,g2,labels = c("A", "B"),ncol = 2, nrow = 1))
}
calcular_resultados_descriptivos=function(datos,name_var){
switch(class(datos[[name_var]]),
character = {
print(str_c("Descriptivos variable ",name_var))
print(mtc_cualitativa(datos,name_var))
print(graficar_cualitativa(datos,name_var))
},
integer = {
print(str_c("Descriptivos variable ",name_var))
print(mtc_cuantitativa(datos,name_var))
print(graficar_cuantitativa(datos,name_var))
},
numeric ={
print(str_c("Descriptivos variable ",name_var))
print(mtc_cuantitativa(datos,name_var))
print(graficar_cuantitativa(datos,name_var))
},
"Tipo de dato no contemplado"
)
}
Exploración univariada de df_simulado, que contiene edad mujeres y etnia.
variables=names(df_simulado)
for (i in 1:length(variables)){
calcular_resultados_descriptivos(df_simulado,variables[i])
}
## [1] "Descriptivos variable edad"
## [1] "Descriptivos-edad"
## V1 V2 V3 V4 V5 V6 V7
## nombres Cantidad Mínimo Q1 Media Media recortada Mediana Moda
## descriptivos 1000 15 33 50.591 50.6126315789474 50 35
## V8 V9 V10 V11
## nombres Varianza Desviación Estándar Q3 Máximo
## descriptivos 425.629348348348 20.6307864209862 70 85
## Warning: Use of `datos3[[name_var]]` is discouraged. Use `.data[[name_var]]`
## instead.
## Warning: Use of `datos3[[name_var]]` is discouraged. Use `.data[[name_var]]`
## instead.
## [1] "Descriptivos variable etnia"
## [1] "Proporición de - etnia del conjunto de datos"
##
## Blanca Mulata Negra
## 34.0 33.3 32.7
## Warning: Use of `datos[[name_var]]` is discouraged. Use `.data[[name_var]]`
## instead.
## Warning: Use of `datos[[name_var]]` is discouraged. Use `.data[[name_var]]`
## instead.