Introduccion de Principios Estadisticos

9 de febrero de 2016

Distribuciones

Se omite en el documento el uso de tildes y simbologia de codificacion

#Test hipotesis una poblacion y supuestos de normalidad
#Utilice los siguientes datos
round(option=3)

## [1] 3

set.seed(2016)
dfr<-rnorm(100, 25, 3.7)
round(dfr,0)

##   [1] 22 29 25 26 15 24 22 22 26 26 23 24 30 22 31 26 22 29 22 22 23 23 24
##  [24] 27 26 21 23 30 29 21 21 25 30 19 25 30 22 22 23 19 25 27 24 23 28 24
##  [47] 20 33 28 19 24 22 16 21 26 19 31 20 24 24 28 27 23 23 27 30 24 21 24
##  [70] 28 29 26 18 25 25 28 26 28 25 24 27 27 27 22 19 32 23 27 24 26 30 27
##  [93] 21 28 24 23 26 24 17 30

d <- density(dfr)
plot(d)
polygon(d, col = "wheat")

Distribucion y Tamano de Muestras

hist(dfr, col=144, main="Histograma de Frecuencias")

##Revisamos supuestos de Pruebas
shapiro.test(dfr)

## 
##  Shapiro-Wilk normality test
## 
## data:  dfr
## W = 0.99281, p-value = 0.8764

Valoracion Descriptiva

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   14.67   22.35   24.02   24.58   27.19   33.12

Prueba de Hipotesis

t.test(dfr, mu=26)

## 
##  One Sample t-test
## 
## data:  dfr
## t = -3.9195, df = 99, p-value = 0.0001635
## alternative hypothesis: true mean is not equal to 26
## 95 percent confidence interval:
##  23.86352 25.29964
## sample estimates:
## mean of x 
##  24.58158

wilcox.test(dfr, mu=26)

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  dfr
## V = 1468, p-value = 0.0002806
## alternative hypothesis: true location is not equal to 26

par(mfrow=c(1,2))
boxplot(dfr); hist(dfr)

#Generamos un segundo grupo de muestras
rnorm(100, 27, 7)-> dfr2
#Elabore un cuadro de estadisticas de resumenes. 
#Incluya
#Promedio, sd, max, min, mediana, cuantiles, IQR, 
#normalidad, Kolmogorov-test.

#Dos poblaciones
#test pareado
t.test(dfr, dfr2, paired = T)

## 
##  Paired t-test
## 
## data:  dfr and dfr2
## t = -3.4477, df = 99, p-value = 0.0008317
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.073105 -1.097364
## sample estimates:
## mean of the differences 
##               -2.585234

Muestras independientes

t.test(dfr, dfr2, paired = F)

## 
##  Welch Two Sample t-test
## 
## data:  dfr and dfr2
## t = -3.41, df = 152.75, p-value = 0.000831
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.082997 -1.087471
## sample estimates:
## mean of x mean of y 
##  24.58158  27.16681

Homogeneidad de Var

var.test(dfr, dfr2)

## 
##  F test to compare two variances
## 
## data:  dfr and dfr2
## F = 0.2951, num df = 99, denom df = 99, p-value = 4.025e-09
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1985531 0.4385821
## sample estimates:
## ratio of variances 
##          0.2950963

Suponiendo var desiguales

t.test(dfr, dfr2, var.equal=F)

## 
##  Welch Two Sample t-test
## 
## data:  dfr and dfr2
## t = -3.41, df = 152.75, p-value = 0.000831
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.082997 -1.087471
## sample estimates:
## mean of x mean of y 
##  24.58158  27.16681

Suponiendo var iguales

t.test(dfr, dfr2, var.equal=T)

## 
##  Two Sample t-test
## 
## data:  dfr and dfr2
## t = -3.41, df = 198, p-value = 0.0007871
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.080269 -1.090199
## sample estimates:
## mean of x mean of y 
##  24.58158  27.16681

Kolmogorov

ks.test(dfr, dfr2)

## 
##  Two-sample Kolmogorov-Smirnov test
## 
## data:  dfr and dfr2
## D = 0.3, p-value = 0.0002468
## alternative hypothesis: two-sided

Chi-cuadrado

H0: Independiente /No asociacion /Son Homogenea / No interaccion

H1:Dependiente / Asociacion / No son Homogenea / Interaccion.

frec<-c(15,19, 22)
chisq.test(frec)

## 
##  Chi-squared test for given probabilities
## 
## data:  frec
## X-squared = 1.3214, df = 2, p-value = 0.5165

qchisq(0.95,2)

## [1] 5.991465

chisq.test(frec)

## 
##  Chi-squared test for given probabilities
## 
## data:  frec
## X-squared = 1.3214, df = 2, p-value = 0.5165

chisq.test(frec)$expected

## [1] 18.66667 18.66667 18.66667

habitat1<-c(3,6,8)
habitat2<-c(3,12,5)
habt<-data.frame(habitat1,habitat2)
habt

##   habitat1 habitat2
## 1        3        3
## 2        6       12
## 3        8        5

rownames(habt)<-c("machos","hembras", "no_sexados")
habt

##            habitat1 habitat2
## machos            3        3
## hembras           6       12
## no_sexados        8        5

chisq.test(habt)

## Warning in chisq.test(habt): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  habt
## X-squared = 2.4653, df = 2, p-value = 0.2915

prop.table(habt)

##              habitat1   habitat2
## machos     0.08108108 0.08108108
## hembras    0.16216216 0.32432432
## no_sexados 0.21621622 0.13513514

fisher.test(habt,simulate.p.value=TRUE)

## 
##  Fisher's Exact Test for Count Data with simulated p-value (based
##  on 2000 replicates)
## 
## data:  habt
## p-value = 0.3018
## alternative hypothesis: two.sided

mosaicplot(habt, color=TRUE, main="Plot de mosaico")

chisq.test(c(28,49,27), p=c(1/4,2/4,1/4))

## 
##  Chi-squared test for given probabilities
## 
## data:  c(28, 49, 27)
## X-squared = 0.36538, df = 2, p-value = 0.833

pro<-chisq.test(c(28,49,27), p=c(1/4,2/4,1/4)); pro

## 
##  Chi-squared test for given probabilities
## 
## data:  c(28, 49, 27)
## X-squared = 0.36538, df = 2, p-value = 0.833

pro$expected

## [1] 26 52 26

Correlacion

head(Orange)

##   Tree  age circumference
## 1    1  118            30
## 2    1  484            58
## 3    1  664            87
## 4    1 1004           115
## 5    1 1231           120
## 6    1 1372           142

cor.test(Orange$age,Orange$circumference,alternative="two.sided", method="pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  Orange$age and Orange$circumference
## t = 12.9, df = 33, p-value = 1.932e-14
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8342364 0.9557955
## sample estimates:
##       cor 
## 0.9135189

cor.test(Orange$age,Orange$circumference,alternative="two.sided", method="spearman")

## Warning in cor.test.default(Orange$age, Orange$circumference, alternative =
## "two.sided", : Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  Orange$age and Orange$circumference
## S = 668.09, p-value = 6.712e-14
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9064294

cor(Orange[,c("age","circumference")], use="complete.obs")

##                     age circumference
## age           1.0000000     0.9135189
## circumference 0.9135189     1.0000000

plot(Orange$age, Orange$circumference)

library(corrplot)
M <- cor(mtcars)

corrplot(M, method = "circle")

corrplot(M, method = "ellipse")

Distribuciones

Se omite en el documento el uso de tildes y simbologia de codificacion

Distribucion y Tamano de Muestras

Valoracion Descriptiva

Prueba de Hipotesis

Muestras independientes

Homogeneidad de Var

Suponiendo var desiguales

Suponiendo var iguales

Kolmogorov

Chi-cuadrado

H0: Independiente /No asociacion /Son Homogenea / No interaccion

H1:Dependiente / Asociacion / No son Homogenea / Interaccion.

Correlacion

Continua…