Objetivo General

. Aprender al manejo y análisis de datos con el Software libre R

Objetivos específicos

. Familiarizarse con el software R . Aprender a cómo manejar datos en R . Afrontar los tipos de análisis puedo aplicar a mis datos.

Introducción

Algunos simbolos y comandos últiles en R

Simbolo	Utilidad
<-	Direccionar y crear objetos y vectores
*	Multiplicación
/	División
+	Suma
-	Resta
sqrt	Raíz Cuadrada
~	Operador para generar función de modelos de regresión y Andevas
log	Función logaritmica
exp	Logaritmo exponencial

Creación de objetos

a<-c(5,3,4,8,7,5,6,5,5,4,1,5,3,8,9,7,4,8,4,7,4,5,4,5,8,7)

Resumenes estadístios

summary(a)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   5.000   5.423   7.000   9.000

#Promedio
mean(a)

## [1] 5.423077

#Desviación estándar
sd(a)

## [1] 1.942639

#Mediana
median(a)

## [1] 5

#Valor máximo
max(a)

## [1] 9

#Valor Mínimo
min(a)

## [1] 1

#Cantidad de datos de un vector
length(a)

## [1] 26

#Tabulación cruzada
table(a)

## a
## 1 3 4 5 6 7 8 9 
## 1 2 6 7 1 4 4 1

#Diagrama de cajas
boxplot(a)

#Histograma
hist(a)

#Frecuencia Relativa
prop.table(table(a))

## a
##          1          3          4          5          6          7 
## 0.03846154 0.07692308 0.23076923 0.26923077 0.03846154 0.15384615 
##          8          9 
## 0.15384615 0.03846154

Distribución de frecuencias

library(fdth)

## 
## Attaching package: 'fdth'

## The following objects are masked from 'package:stats':
## 
##     sd, var

set.seed(2016)
datos<-rnorm(100, mean=25, sd=4)
dist1<-fdt(datos)
dist1

##     Class limits  f   rf rf(%)  cf cf(%)
##  [13.696,16.248)  3 0.03     3   3     3
##    [16.248,18.8)  3 0.03     3   6     6
##    [18.8,21.353) 12 0.12    12  18    18
##  [21.353,23.905) 31 0.31    31  49    49
##  [23.905,26.458) 19 0.19    19  68    68
##   [26.458,29.01) 19 0.19    19  87    87
##   [29.01,31.562) 11 0.11    11  98    98
##  [31.562,34.115)  2 0.02     2 100   100

#Cualtiles
quantile(a)

##   0%  25%  50%  75% 100% 
##    1    4    5    7    9

#Percentiles
quantile(a, .60)

## 60% 
##   5

#?fdth
#Histograma mediante el paquete de "fdth"
plot(dist1, x.round=2,xlas=1, cex.lab=.6, cex.axis=.6, xlab="Límite de clases", ylab="Frecuencia")

#Polígono de frecuencias mediante el paquete de "fdth"
plot(dist1, x.round=2,xlas=1, cex.lab=.6, cex.axis=.6, xlab="Límite de clases", ylab="Frecuencia", type="cfp", col="red")

Test de hipótesis de una Población

t.test(a)

## 
##  One Sample t-test
## 
## data:  a
## t = 14.234, df = 25, p-value = 1.702e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  4.638428 6.207726
## sample estimates:
## mean of x 
##  5.423077

Intérvalo de confianza

t.test(a, conf.level = .99)

## 
##  One Sample t-test
## 
## data:  a
## t = 14.234, df = 25, p-value = 1.702e-13
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
##  4.361112 6.485042
## sample estimates:
## mean of x 
##  5.423077

Test de hipótesis de una cola

t.test(a,mu=4, alternative = "l")

## 
##  One Sample t-test
## 
## data:  a
## t = 3.7353, df = 25, p-value = 0.9995
## alternative hypothesis: true mean is less than 4
## 95 percent confidence interval:
##      -Inf 6.073849
## sample estimates:
## mean of x 
##  5.423077

Para dos Poblaciones

placebo<-c(20,31,22,33,24,45,46,27,48,24,25,28,29,25)
droga<-  c(55,60,49,50,68,70,54,65,62,43,34,51,29,29)
length(placebo)

## [1] 14

length(droga)

## [1] 14

boxplot(placebo, droga, names = c("placebo", "droga"),col=c("whitesmoke","gray") )

Distribución Normal de datos

shapiro.test(placebo)

## 
##  Shapiro-Wilk normality test
## 
## data:  placebo
## W = 0.83741, p-value = 0.01505

shapiro.test(droga)

## 
##  Shapiro-Wilk normality test
## 
## data:  droga
## W = 0.93388, p-value = 0.3455

Homogeneidad de las varianzas para dos poblaciones

var.test(placebo,droga )

## 
##  F test to compare two variances
## 
## data:  placebo and droga
## F = 0.46366, num df = 13, denom df = 13, p-value = 0.1792
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1488468 1.4443274
## sample estimates:
## ratio of variances 
##          0.4636632

Test no Paramétricos

wilcox.test(placebo, droga)

## Warning in wilcox.test.default(placebo, droga): cannot compute exact p-
## value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  placebo and droga
## W = 17, p-value = 0.0002141
## alternative hypothesis: true location shift is not equal to 0

Aplicación del test de hipótesis

Test paramétricos de dos poblaciones

nofunamdores <- c(18,22,21,17,20,17,23,20,22,21)
fumadores <- c(16,20,14,21,20,18,13,15,17,21)

shapiro.test(nofunamdores)

## 
##  Shapiro-Wilk normality test
## 
## data:  nofunamdores
## W = 0.91199, p-value = 0.295

shapiro.test(fumadores)

## 
##  Shapiro-Wilk normality test
## 
## data:  fumadores
## W = 0.91941, p-value = 0.3521

#Test de Hipótesis
var.test(nofunamdores,fumadores )

## 
##  F test to compare two variances
## 
## data:  nofunamdores and fumadores
## F = 0.52102, num df = 9, denom df = 9, p-value = 0.3456
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1294138 2.0976199
## sample estimates:
## ratio of variances 
##          0.5210191

t.test(nofunamdores,fumadores, var.equal = T )

## 
##  Two Sample t-test
## 
## data:  nofunamdores and fumadores
## t = 2.2573, df = 18, p-value = 0.03665
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1801366 5.0198634
## sample estimates:
## mean of x mean of y 
##      20.1      17.5

Receso

Test de Correlación

Correlación entre (“volume”) y (“Height”)

data("trees")
head(trees)

##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7

Exploración de datos numéricos

plot(trees)
library(car)

scatterplotMatrix(trees)

Comprobación de supuestos

shapiro.test(trees$Height) #Normal

## 
##  Shapiro-Wilk normality test
## 
## data:  trees$Height
## W = 0.96545, p-value = 0.4034

shapiro.test(trees$Volume) #Asímetrico

## 
##  Shapiro-Wilk normality test
## 
## data:  trees$Volume
## W = 0.88757, p-value = 0.003579

Aplicación de la correlación no paramétrica

cor.test(trees$Girth,trees$Volume, method = "p", exact = F)

## 
##  Pearson's product-moment correlation
## 
## data:  trees$Girth and trees$Volume
## t = 20.478, df = 29, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9322519 0.9841887
## sample estimates:
##       cor 
## 0.9671194

#Forma gráfica de la correlación
library(corrplot)

## Warning: package 'corrplot' was built under R version 3.3.1

corMat<-cor(trees, method = "p")
corrplot(corMat,  method="ellipse")

Aplicación práctica de correlación

library(car)
data("mtcars")
head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Selección de datos

mtcars2<-mtcars[,1:4]
head(mtcars2)

##                    mpg cyl disp  hp
## Mazda RX4         21.0   6  160 110
## Mazda RX4 Wag     21.0   6  160 110
## Datsun 710        22.8   4  108  93
## Hornet 4 Drive    21.4   6  258 110
## Hornet Sportabout 18.7   8  360 175
## Valiant           18.1   6  225 105

#Figuras gráficas de la coreelación entre variables
scatterplotMatrix(mtcars2)

Comprobación de supuestos

shapiro.test(mtcars2$mpg)

## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars2$mpg
## W = 0.94756, p-value = 0.1229

shapiro.test(mtcars2$cyl)

## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars2$cyl
## W = 0.75331, p-value = 6.058e-06

Correlación no paramétrica

corMat<-cor(mtcars2, method = "p")
corMat

##             mpg        cyl       disp         hp
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475
## disp -0.8475514  0.9020329  1.0000000  0.7909486
## hp   -0.7761684  0.8324475  0.7909486  1.0000000

corrplot(corMat,  method="ellipse")

Análisis de Varianza

data(InsectSprays)
attach(InsectSprays)
#Permite ver las caracteristicas de las variables
str(InsectSprays)

## 'data.frame':    72 obs. of  2 variables:
##  $ count: num  10 7 20 14 14 12 10 23 17 20 ...
##  $ spray: Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...

model_Insect<-aov(count ~ spray,data=InsectSprays)
summary(model_Insect)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## spray        5   2669   533.8    34.7 <2e-16 ***
## Residuals   66   1015    15.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Medidas descriptivas utiles para los ANOVAS

tapply(count, spray, mean) #Permite ver la media de los factores (Tratamientos)

##         A         B         C         D         E         F 
## 14.500000 15.333333  2.083333  4.916667  3.500000 16.666667

tapply(count, spray, sd) #Permite ver la desviacion estandar de los factores.

##        A        B        C        D        E        F 
## 4.719399 4.271115 1.975225 2.503028 1.732051 6.213378

tapply(count, spray, length) #Permite ver la cantidad de datos

##  A  B  C  D  E  F 
## 12 12 12 12 12 12

model.tables(model_Insect)#Permite medir el efecto de los tratamientos

## Tables of effects
## 
##  spray 
## spray
##      A      B      C      D      E      F 
##  5.000  5.833 -7.417 -4.583 -6.000  7.167

model.tables(model_Insect, type= "means")#Permite ver las medias de los tratamientos

## Tables of means
## Grand mean
##     
## 9.5 
## 
##  spray 
## spray
##      A      B      C      D      E      F 
## 14.500 15.333  2.083  4.917  3.500 16.667

boxplot(count ~ spray, data=InsectSprays, col="16", xlab="Tipos de Spray", ylab="Cantidad de Insectos")

#Editando la Figura
library(ggplot2)
ggplot(InsectSprays,aes(spray, count, colour = spray ))+geom_point() + geom_boxplot()

model_Insect<-aov(count~spray, data=InsectSprays)
summary(model_Insect)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## spray        5   2669   533.8    34.7 <2e-16 ***
## Residuals   66   1015    15.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Comparaciones planeadas: Cuando tenemos sospecha de un posible resultado del análisis, es decir que entre grupos esperamos encontrar diferencias.

Ejm: cuando en un experimento hay un grupo control, a otro se le aplica un medicamento y otro grupo se le suministra mayor cantidad de un medicamento experimental. Posiblemente sospechemos que vamos a tener resultados diferentes (esto es lo que se entiende en el diseño de muestreo como un Experimento Planeado. Esto por tal debe se tratarse con la prueba de Fisher, en caso de los residuos cumplir con los supuyestos paramétricos.

Contrastes no planeados (post-hoc) Aquí hay múltiples técnicas, y lo que intenta es reducir posibilidad de errores tipo I (no acepta la hipótesis nula siendo esta verdadera), a costa de aumentar el error tipo II. Dicho de otro modo, en situaciones donde haya realmente diferencias entre grupos, las pruebas post-hoc no lo detectan. Por lo que se reconocen como los métodos más seguros, dado que detectan las verdaderas diferencias significativas.

Post Hoc test (Paramétrico)

#Test HSD (Honestly-significant-difference)
TukeyHSD(model_Insect)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = count ~ spray, data = InsectSprays)
## 
## $spray
##            diff        lwr       upr     p adj
## B-A   0.8333333  -3.866075  5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A  -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A   2.1666667  -2.532742  6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B   1.3333333  -3.366075  6.032742 0.9603075
## D-C   2.8333333  -1.866075  7.532742 0.4920707
## E-C   1.4166667  -3.282742  6.116075 0.9488669
## F-C  14.5833333   9.883925 19.282742 0.0000000
## E-D  -1.4166667  -6.116075  3.282742 0.9488669
## F-D  11.7500000   7.050591 16.449409 0.0000000
## F-E  13.1666667   8.467258 17.866075 0.0000000

#Forma Gráfica del HSD
plot(TukeyHSD(model_Insect),  cex.axis=.7)

Comprobación de los supuestos del andeva

par(mfrow=c(2,2)) #Particiona mi ventana grafica
plot(model_Insect)

Comprobación estadística de los residuos

#Normalidad de los residuos
shapiro.test(residuals(model_Insect))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_Insect)
## W = 0.96006, p-value = 0.02226

#Homogeneidad de varianzas
library(car)
levene.test(model_Insect)

## Warning: 'levene.test' is deprecated.
## Use 'leveneTest' instead.
## See help("Deprecated") and help("car-deprecated").

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  5  3.8214 0.004223 **
##       66                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Dado que no cumplen con los supuestos paramétricos, la opción es aplicar es un test no paramétrico.

Test no paramétrico del ANDEVA

kruskal.test(count~spray, data=InsectSprays)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  count by spray
## Kruskal-Wallis chi-squared = 54.691, df = 5, p-value = 1.511e-10

No homogeneidad, ni Distribución Normal de los residuos

Que hacer?

#Comparaciones no planeadas "Dunn's-test" y "bonferroni"
library(PMCMR)
posthoc.kruskal.dunn.test(x=count, g=spray, p.adjust.method="none")

## Warning in posthoc.kruskal.dunn.test.default(x = count, g = spray,
## p.adjust.method = "none"): Ties are present. z-quantiles were corrected for
## ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  count and spray 
## 
##   A       B       C       D       E      
## B 0.75448 -       -       -       -      
## C 1.8e-06 3.6e-07 -       -       -      
## D 0.00182 0.00060 0.09762 -       -      
## E 0.00012 3.1e-05 0.35572 0.46358 -      
## F 0.68505 0.92603 2.2e-07 0.00043 2.1e-05
## 
## P value adjustment method: none

posthoc.kruskal.dunn.test(x=count, g=spray, p.adjust.method="bonferroni")

## Warning in posthoc.kruskal.dunn.test.default(x = count, g = spray,
## p.adjust.method = "bonferroni"): Ties are present. z-quantiles were
## corrected for ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  count and spray 
## 
##   A       B       C       D       E      
## B 1.00000 -       -       -       -      
## C 2.7e-05 5.5e-06 -       -       -      
## D 0.02735 0.00904 1.00000 -       -      
## E 0.00177 0.00047 1.00000 1.00000 -      
## F 1.00000 1.00000 3.3e-06 0.00640 0.00031
## 
## P value adjustment method: bonferroni

Aplicación práctica del ANDEVA

library(car)
data(Baumann)
head(Baumann)

##   group pretest.1 pretest.2 post.test.1 post.test.2 post.test.3
## 1 Basal         4         3           5           4          41
## 2 Basal         6         5           9           5          41
## 3 Basal         9         4           5           3          43
## 4 Basal        12         6           8           5          46
## 5 Basal        16         5          10           9          46
## 6 Basal        15        13           9           8          45

str(Baumann)

## 'data.frame':    66 obs. of  6 variables:
##  $ group      : Factor w/ 3 levels "Basal","DRTA",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ pretest.1  : int  4 6 9 12 16 15 14 12 12 8 ...
##  $ pretest.2  : int  3 5 4 6 5 13 8 7 3 8 ...
##  $ post.test.1: int  5 9 5 8 10 9 12 5 8 7 ...
##  $ post.test.2: int  4 5 3 5 9 8 5 5 7 7 ...
##  $ post.test.3: int  41 41 43 46 46 45 45 32 33 39 ...

attach(Baumann)
is.factor(group)

## [1] TRUE

is.numeric(pretest.1)

## [1] TRUE

attach(Baumann)

## The following objects are masked from Baumann (pos = 3):
## 
##     group, post.test.1, post.test.2, post.test.3, pretest.1,
##     pretest.2

model1<-aov(post.test.1~group,data=Baumann)
summary(model1)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## group        2  108.1   54.06   5.317 0.00735 **
## Residuals   63  640.5   10.17                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Supuesto del Modelo

par(mfrow=c(2,2))
plot(model1)

#Normalidad de los residuos
shapiro.test(residuals(model1))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model1)
## W = 0.97734, p-value = 0.2682

Homogeneidad de Varianza

leveneTest(post.test.1~group,data=Baumann)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  2.1297 0.1273
##       63

bartlett.test(post.test.1~group,data=Baumann)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  post.test.1 by group
## Bartlett's K-squared = 3.7349, df = 2, p-value = 0.1545

Post-Hoc

TukeyHSD(model1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = post.test.1 ~ group, data = Baumann)
## 
## $group
##                  diff        lwr       upr     p adj
## DRTA-Basal   3.090909  0.7832953 5.3985228 0.0057599
## Strat-Basal  1.090909 -1.2167047 3.3985228 0.4965098
## Strat-DRTA  -2.000000 -4.3076137 0.3076137 0.1021195

plot(TukeyHSD(model1))

boxplot(post.test.1~group,data=Baumann)

library(ggplot2)
ggplot(Baumann, aes(x=factor(group), y=post.test.1, fill=group)) + geom_boxplot()

Regresiones

data("trees")
head(trees)

##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7

pairs(trees,pch=16, panel = panel.smooth)

Modelo de Regresión

reg1 <- lm(Volume ~ Height + Girth, data=trees)
summary(reg1)

## 
## Call:
## lm(formula = Volume ~ Height + Girth, data = trees)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4065 -2.6493 -0.2876  2.2003  8.4847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
## Height        0.3393     0.1302   2.607   0.0145 *  
## Girth         4.7082     0.2643  17.816  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.882 on 28 degrees of freedom
## Multiple R-squared:  0.948,  Adjusted R-squared:  0.9442 
## F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16

Revisamos los supuestos

par(mfrow=c(2,2))
plot(reg1)

#Parece haber ciertos problemas en el cumplimiento de los supuestos!

reg2 <- lm(log(Volume) ~ log(Height) + log(Girth), data=trees)
summary(reg2)

## 
## Call:
## lm(formula = log(Volume) ~ log(Height) + log(Girth), data = trees)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.168561 -0.048488  0.002431  0.063637  0.129223 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.63162    0.79979  -8.292 5.06e-09 ***
## log(Height)  1.11712    0.20444   5.464 7.81e-06 ***
## log(Girth)   1.98265    0.07501  26.432  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08139 on 28 degrees of freedom
## Multiple R-squared:  0.9777, Adjusted R-squared:  0.9761 
## F-statistic: 613.2 on 2 and 28 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(reg2)

Aplicación gráfica de la regresión

library(visreg)

## Warning: package 'visreg' was built under R version 3.3.1

par(mfrow=c(2,2))
visreg(reg2)

library(ggplot2)#ggplot2 es una extension poderosa para graficar
ggplot(trees, aes(x=log(Height), y=log(Volume))) +
geom_point(shape=1) +    # genera circulos en el grafico
geom_smooth(method=lm)

ggplot(trees, aes(x=log(Girth), y=log(Volume))) +
geom_point() +
geom_smooth(span = 0.3)

## `geom_smooth()` using method = 'loess'

Valores influyentes del modelo

library(car)
influencePlot(reg1, id.n = 3)

##      StudRes       Hat     CookD
## 2   1.651668 0.1472096 0.1478463
## 3   1.567740 0.1768619 0.1673192
## 18 -1.859901 0.1434615 0.1775359
## 20 -1.105744 0.2112366 0.1082855
## 31  2.765603 0.2270585 0.6052326

Identificamos valores extremos en el modelo

outlierTest(reg1)

## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferonni p
## 31 2.765603           0.010122      0.31377

outlierTest(reg2)

## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferonni p
## 18 -2.32572           0.027789      0.86146

Extraemos el valor extremos y probamos un nuevo modelo

trees1<-trees[-c(2,3,18,20,31),]
trees1

##    Girth Height Volume
## 1    8.3     70   10.3
## 4   10.5     72   16.4
## 5   10.7     81   18.8
## 6   10.8     83   19.7
## 7   11.0     66   15.6
## 8   11.0     75   18.2
## 9   11.1     80   22.6
## 10  11.2     75   19.9
## 11  11.3     79   24.2
## 12  11.4     76   21.0
## 13  11.4     76   21.4
## 14  11.7     69   21.3
## 15  12.0     75   19.1
## 16  12.9     74   22.2
## 17  12.9     85   33.8
## 19  13.7     71   25.7
## 21  14.0     78   34.5
## 22  14.2     80   31.7
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  17.3     81   55.4
## 27  17.5     82   55.7
## 28  17.9     80   58.3
## 29  18.0     80   51.5
## 30  18.0     80   51.0

reg3 <- lm(log(Volume) ~ log(Height) + log(Girth), data=trees1)
summary(reg3)

## 
## Call:
## lm(formula = log(Volume) ~ log(Height) + log(Girth), data = trees1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.168557 -0.064486  0.008656  0.062461  0.119170 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.92713    1.14349  -6.932 4.57e-07 ***
## log(Height)  1.42237    0.27875   5.103 3.62e-05 ***
## log(Girth)   1.97364    0.08288  23.815  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08015 on 23 degrees of freedom
## Multiple R-squared:  0.9723, Adjusted R-squared:  0.9699 
## F-statistic: 404.2 on 2 and 23 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(reg3)

#Criterio de selección del modelo (Akaike information criterion)

AIC(reg2,reg3)

## Warning in AIC.default(reg2, reg3): models are not all fitted to the same
## number of observations

##      df       AIC
## reg2  4 -62.71125
## reg3  4 -52.64131

Análisis de Biodoversidad

library(BiodiversityR)

## Loading required package: tcltk

## Loading required package: vegan

## Loading required package: permute

## Loading required package: lattice

## This is vegan 2.4-1

## BiodiversityR 2.7-2: use function 'BiodiversityRGUI()' to launch the BiodiversityR Graphical User Interface

BiodiversityRGUI()

## Loading required namespace: dismo

## La interfaz R-Commander sólo funciona en sesiones interactivas

## 
## Attaching package: 'Rcmdr'

## The following objects are masked from 'package:tcltk':
## 
##     tclvalue, tkfocus

## Fuente: BiodiversityGUI.R

beetles <- read.delim ('http://www.davidzeleny.net/anadat-r/data-download/carabid-beetles-boreal-forest.txt', row.names = 1)

#Permite transponer datos
t(beetles)->beetles2
head(beetles2)

##                        Calathus.micropterus Pterostichus.oblongopunctatus
## moist.primeval.forests                  203                           101
## dry.primeval.forests                     91                            18
## managed.forests                         161                            54
## old.plantations                          29                             9
## young.plantations                        48                            23
##                        Notiophilus.biguttatus Carabus.hortensis
## moist.primeval.forests                     73                68
## dry.primeval.forests                       29                 6
## managed.forests                            37                43
## old.plantations                             1                 0
## young.plantations                           3                 2
##                        Carabus.glabratus Cychrus.caraboides Amara.brunnea
## moist.primeval.forests                22                 41            30
## dry.primeval.forests                   9                  8            16
## managed.forests                       22                 16             9
## old.plantations                        1                  1             0
## young.plantations                     15                  6             2
##                        Trechus.secalis Leistus.terminatus Agonum.fuliginosum
## moist.primeval.forests              11                 31                  0
## dry.primeval.forests                11                  4                  0
## managed.forests                      4                  8                  2
## old.plantations                     15                  1                  0
## young.plantations                   30                  3                  0
##                        Agonum.mannerheimii Amara.familiaris Amara.lunicollis
## moist.primeval.forests                   7                0                0
## dry.primeval.forests                     0                0                0
## managed.forests                          1                0                0
## old.plantations                          0                0                0
## young.plantations                        0                1                7
##                        Bembidion.gilvipes Bradycellus.caucasicus
## moist.primeval.forests                  0                      0
## dry.primeval.forests                    0                      0
## managed.forests                         0                      0
## old.plantations                         0                      0
## young.plantations                       2                      1
##                        Calathus.melanocephalus Carabus.nitens Carabus.violaceus
## moist.primeval.forests                       0              0                 0
## dry.primeval.forests                         0              0                 0
## managed.forests                              0              1                 0
## old.plantations                              0              0                 0
## young.plantations                            3              1                 1
##                        Cicindela.sylvatica Cymindis.vaporariorum Dromius.agilis
## moist.primeval.forests                   0                     0              0
## dry.primeval.forests                     0                     0              0
## managed.forests                          0                     0              1
## old.plantations                          0                     0              0
## young.plantations                       10                     3              0
##                        Dyschirius.globosus Harpalus.quadripunctatus Harpalus.sp
## moist.primeval.forests                   1                        1           0
## dry.primeval.forests                     0                        3           0
## managed.forests                          0                        0           0
## old.plantations                          0                        0           0
## young.plantations                        0                        7           1
##                        Leistus.ferrugineus Loricera.pilicornis
## moist.primeval.forests                   0                   0
## dry.primeval.forests                     1                   0
## managed.forests                          0                   8
## old.plantations                          0                   0
## young.plantations                        1                   0
##                        Miscodera.arctica Notiophilus.aestuans
## moist.primeval.forests                 0                    0
## dry.primeval.forests                   0                    0
## managed.forests                        1                    0
## old.plantations                        0                    0
## young.plantations                     13                    2
##                        Notiophilus.germinyi Notiophilus.palustris
## moist.primeval.forests                    1                     0
## dry.primeval.forests                      0                     1
## managed.forests                           0                     1
## old.plantations                           0                     0
## young.plantations                         9                     9
##                        Notiophilus.reitteri Patrobus.assimilis
## moist.primeval.forests                    5                  0
## dry.primeval.forests                      0                  0
## managed.forests                           7                  1
## old.plantations                           0                  0
## young.plantations                         0                  0
##                        Patrobus.atrorufus Pterostichus.adstrictus
## moist.primeval.forests                  1                       0
## dry.primeval.forests                    0                       0
## managed.forests                         0                       0
## old.plantations                         0                       0
## young.plantations                       0                      23
##                        Pterostichus.cupreus Pterostichus.diligens
## moist.primeval.forests                    0                     0
## dry.primeval.forests                      2                     0
## managed.forests                           0                     1
## old.plantations                           0                     2
## young.plantations                         1                     1
##                        Pterostichus.niger Pterostichus.strenuus
## moist.primeval.forests                  1                     0
## dry.primeval.forests                    0                     0
## managed.forests                         3                     0
## old.plantations                         0                     4
## young.plantations                       7                     4
##                        Synuchus.vivalis
## moist.primeval.forests                0
## dry.primeval.forests                  0
## managed.forests                       0
## old.plantations                       0
## young.plantations                     4

#Permite obtener número de espececies por sitio (Riqueza)
specnumber(beetles2)

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##                     16                     13                     20 
##        old.plantations      young.plantations 
##                      9                     31

#Permite obtener la suma de las abundancias por sitio
apply(beetles2,1,sum)

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##                    597                    199                    381 
##        old.plantations      young.plantations 
##                     63                    243

#base ln (logaritmo neperiano)
H<-diversity(beetles2, index="shannon")  
H

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##               2.000573               1.834545               1.958269 
##        old.plantations      young.plantations 
##               1.524423               2.817914

#Indice de  simpson (Dominancia)
simp <- diversity(beetles2, "simpson")
 #reciproco de Simpson (Dominancia)
invsimp <- diversity(beetles2, "inv")   
invsimp #Aumenta cuando la comunidad es menos equitativa

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##               5.433975               3.946288               4.386987 
##        old.plantations      young.plantations 
##               3.389411              11.370884

#Pielou´s
J <- H/log(specnumber(beetles2))  
J# Pielou´s (valores>son equitativo, uniformes en las abundancias, valores< mayor dominancia, alta heterogeneidad).

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##              0.7215542              0.7152364              0.6536864 
##        old.plantations      young.plantations 
##              0.6937949              0.8205954

#Permite estimar el número de especies no observadas.
specpool(beetles2)

##     Species chao  chao.se jack1 jack1.se jack2     boot  boot.se n
## All      39 55.2 10.35978  53.4 11.32784  60.6 45.56192 6.477809 5

#Curva de Acumulación
acr<-specaccum(beetles2, "rarefaction")
acr

## Species Accumulation Curve
## Accumulation method: rarefaction
## Call: specaccum(comm = beetles2, method = "rarefaction") 
## 
##                                                      
## Sites         1.0013   1.9993   3.0007    3.9987    5
## Individuals 297.0000 593.0000 890.0000 1186.0000 1483
## Richness     25.7627  31.3896  34.7565   37.1708   39
## sd            2.1465   1.9601   1.6713    1.2204    0

plot(acr, xlab = "# of samples", ylab = "# of species")

plot(acr,ci.type="poly", col="blue", lwd=2, ci.lty=0, ci.col="lightblue")

#Curva de Acumulación por sitios
rarecurve (beetles2, col="blue", cex = 0.6)

RankAbun <- rankabundance(beetles2)
RankAbun

##                               rank abundance proportion plower pupper accumfreq
## Calathus.micropterus             1       532       35.9   25.0   46.7      35.9
## Pterostichus.oblongopunctatus    2       205       13.8    8.9   18.7      49.7
## Notiophilus.biguttatus           3       143        9.6    3.8   15.5      59.3
## Carabus.hortensis                4       119        8.0    1.4   14.6      67.4
## Cychrus.caraboides               5        72        4.9    2.0    7.7      72.2
## Trechus.secalis                  6        71        4.8   -1.8   11.4      77.0
## Carabus.glabratus                7        69        4.7    2.9    6.4      81.7
## Amara.brunnea                    8        57        3.8    0.8    6.9      85.5
## Leistus.terminatus               9        47        3.2    0.3    6.1      88.7
## Pterostichus.adstrictus         10        23        1.6   -3.1    6.2      90.2
## Miscodera.arctica               11        14        0.9   -1.7    3.6      91.2
## Notiophilus.reitteri            12        12        0.8   -0.2    1.8      92.0
## Harpalus.quadripunctatus        13        11        0.7   -0.7    2.2      92.7
## Notiophilus.palustris           14        11        0.7   -1.1    2.6      93.5
## Pterostichus.niger              15        11        0.7   -0.6    2.1      94.2
## Cicindela.sylvatica             16        10        0.7   -1.4    2.7      94.9
## Notiophilus.germinyi            17        10        0.7   -1.1    2.4      95.5
## Loricera.pilicornis             18         8        0.5   -0.9    2.0      96.1
## Agonum.mannerheimii             19         8        0.5   -0.4    1.4      96.6
## Pterostichus.strenuus           20         8        0.5   -0.7    1.8      97.2
## Amara.lunicollis                21         7        0.5   -1.0    1.9      97.6
## Pterostichus.diligens           22         4        0.3   -0.3    0.8      97.9
## Synuchus.vivalis                23         4        0.3   -0.5    1.1      98.2
## Calathus.melanocephalus         24         3        0.2   -0.4    0.8      98.4
## Cymindis.vaporariorum           25         3        0.2   -0.4    0.8      98.6
## Pterostichus.cupreus            26         3        0.2   -0.3    0.7      98.8
## Agonum.fuliginosum              27         2        0.1   -0.2    0.5      98.9
## Bembidion.gilvipes              28         2        0.1   -0.3    0.5      99.1
## Carabus.nitens                  29         2        0.1   -0.1    0.4      99.2
## Leistus.ferrugineus             30         2        0.1   -0.2    0.4      99.3
## Notiophilus.aestuans            31         2        0.1   -0.3    0.5      99.5
## Bradycellus.caucasicus          32         1        0.1   -0.1    0.3      99.5
## Dyschirius.globosus             33         1        0.1   -0.1    0.2      99.6
## Amara.familiaris                34         1        0.1   -0.1    0.3      99.7
## Carabus.violaceus               35         1        0.1   -0.1    0.3      99.7
## Dromius.agilis                  36         1        0.1   -0.1    0.3      99.8
## Harpalus.sp                     37         1        0.1   -0.1    0.3      99.9
## Patrobus.assimilis              38         1        0.1   -0.1    0.3      99.9
## Patrobus.atrorufus              39         1        0.1   -0.1    0.2     100.0
##                               logabun rankfreq
## Calathus.micropterus              2.7      2.6
## Pterostichus.oblongopunctatus     2.3      5.1
## Notiophilus.biguttatus            2.2      7.7
## Carabus.hortensis                 2.1     10.3
## Cychrus.caraboides                1.9     12.8
## Trechus.secalis                   1.9     15.4
## Carabus.glabratus                 1.8     17.9
## Amara.brunnea                     1.8     20.5
## Leistus.terminatus                1.7     23.1
## Pterostichus.adstrictus           1.4     25.6
## Miscodera.arctica                 1.1     28.2
## Notiophilus.reitteri              1.1     30.8
## Harpalus.quadripunctatus          1.0     33.3
## Notiophilus.palustris             1.0     35.9
## Pterostichus.niger                1.0     38.5
## Cicindela.sylvatica               1.0     41.0
## Notiophilus.germinyi              1.0     43.6
## Loricera.pilicornis               0.9     46.2
## Agonum.mannerheimii               0.9     48.7
## Pterostichus.strenuus             0.9     51.3
## Amara.lunicollis                  0.8     53.8
## Pterostichus.diligens             0.6     56.4
## Synuchus.vivalis                  0.6     59.0
## Calathus.melanocephalus           0.5     61.5
## Cymindis.vaporariorum             0.5     64.1
## Pterostichus.cupreus              0.5     66.7
## Agonum.fuliginosum                0.3     69.2
## Bembidion.gilvipes                0.3     71.8
## Carabus.nitens                    0.3     74.4
## Leistus.ferrugineus               0.3     76.9
## Notiophilus.aestuans              0.3     79.5
## Bradycellus.caucasicus            0.0     82.1
## Dyschirius.globosus               0.0     84.6
## Amara.familiaris                  0.0     87.2
## Carabus.violaceus                 0.0     89.7
## Dromius.agilis                    0.0     92.3
## Harpalus.sp                       0.0     94.9
## Patrobus.assimilis                0.0     97.4
## Patrobus.atrorufus                0.0    100.0

rankabunplot(RankAbun,scale='proportion', addit=FALSE, specnames=c(1,2,3,4))

Agrupamientos (Cluster)

library(cluster)
par(mfrwo=c(2,2))

## Warning in par(mfrwo = c(2, 2)): "mfrwo" is not a graphical parameter

ag1<-agnes(beetles2, method="single")
ag2<-agnes(beetles2, method="complete")
ag3<-agnes(beetles2, method="average")
plot(ag1)

plot(ag2)

plot(ag3)

be.dist<-vegdist(beetles2, method = "bray")
hclust <- hclust(be.dist, method = "average")
plot(hclust, ylab = "Bray-Curtis dissimilarity")

library(vegan)
data("dune")
H<-diversity(dune)
data("dune.env")
boxplot(specnumber(dune) ~ dune.env$Use, col="gray", border="blue")

pool <- specpool(dune, dune.env$Use)
pool

##          Species     chao  chao.se jack1 jack1.se    jack2     boot  boot.se n
## Hayfield      27 29.33333 2.459549  33.0 3.464102 32.64286 30.32105 2.483249 7
## Haypastu      24 31.00000 6.614378  31.0 4.077377 34.42857 27.22466 2.228030 8
## Pasture       23 29.48000 5.791414  30.2 5.600000 33.35000 26.40032 3.009648 5

reg.riq<-lm(specnumber(dune) ~ dune.env$Use)
summary(reg.riq)

## 
## Call:
## lm(formula = specnumber(dune) ~ dune.env$Use)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.500 -1.575  0.000  1.836  3.714 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      9.8619     0.5753  17.143 3.66e-12 ***
## dune.env$Use.L  -0.3435     1.0447  -0.329    0.746    
## dune.env$Use.Q   0.4432     0.9457   0.469    0.645    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.523 on 17 degrees of freedom
## Multiple R-squared:  0.021,  Adjusted R-squared:  -0.09418 
## F-statistic: 0.1823 on 2 and 17 DF,  p-value: 0.8349

reg.div<-lm(H ~ dune.env$Use)
summary(reg.div)

## 
## Call:
## lm(formula = H ~ dune.env$Use)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66613 -0.15333  0.00045  0.24641  0.32016 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     2.16626    0.06398  33.859   <2e-16 ***
## dune.env$Use.L -0.04080    0.11618  -0.351    0.730    
## dune.env$Use.Q  0.07305    0.10517   0.695    0.497    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2806 on 17 degrees of freedom
## Multiple R-squared:  0.03765,    Adjusted R-squared:  -0.07556 
## F-statistic: 0.3326 on 2 and 17 DF,  p-value: 0.7216

library(vegan)
data(dune.env)
data(dune)
dune.env$site.totals <- apply(dune,1,sum)
Accum.1 <- accumresult(dune, y=dune.env, scale='site.totals', method='exact', conditioned=TRUE)
Accum.1

## Species Accumulation Curve
## Accumulation method: exact
## Call: specaccum(comm = x, method = method, permutations = permutations,      conditioned = conditioned, gamma = gamma) 
## 
##                                                                               
## Sites    34.2500 68.5000 102.7500 137.0000 171.2500 205.5000 239.7500 274.0000
## Richness  9.8500 15.1105  18.5105  20.9375  22.7543  24.1496  25.2396  26.1035
## sd        2.3511  1.8764   1.5723   1.4470   1.3902   1.3530   1.3165   1.2749
##                                                                        
## Sites    308.2500 342.5000 376.7500 411.0000 445.2500 479.5000 513.7500
## Richness  26.7982  27.3650  27.8340  28.2275  28.5620  28.8496  29.0996
## sd         1.2282   1.1763   1.1193   1.0565   0.9874   0.9116   0.8287
##                                                 
## Sites    548.0000 582.2500 616.5000 650.7500 685
## Richness  29.3191  29.5140  29.6895  29.8500  30
## sd         0.7381   0.6334   0.5140   0.3571   0

accumplot(Accum.1)

accumcomp(dune, y=dune.env, factor='Management', method='exact', legend=FALSE, conditioned=TRUE)

## , ,  = Sites
## 
##           obs
## Management 1 2 3  4  5  6
##         BF 1 2 3 NA NA NA
##         HF 1 2 3  4  5 NA
##         NM 1 2 3  4  5  6
##         SF 1 2 3  4  5  6
## 
## , ,  = Richness
## 
##           obs
## Management         1        2     3        4        5  6
##         BF 10.333333 14.33333 16.00       NA       NA NA
##         HF 12.600000 16.70000 19.10 20.40000 21.00000 NA
##         NM  8.000000 12.86667 16.20 18.46667 20.00000 21
##         SF  9.166667 13.60000 16.25 18.13333 19.66667 21
## 
## , ,  = sd
## 
##           obs
## Management         1         2        3        4        5  6
##         BF 1.2472191 0.4714045 0.000000       NA       NA NA
##         HF 1.0198039 1.3343708 1.732051 0.800000 0.000000 NA
##         NM 0.8164966 1.0011583 1.346070 1.642761 1.154701  0
##         SF 2.4094720 1.4282970 1.225156 1.231079 0.942809  0

***** FIN ____

Aplicaciones de la Bioestadistica con software libre R

Oscar Ramírez-Alán

21 de noviembre de 2016

Introducción

Creación de objetos

Resumenes estadístios

Distribución de frecuencias

Test de hipótesis de una Población

Intérvalo de confianza

Test de hipótesis de una cola

Para dos Poblaciones

Distribución Normal de datos

Homogeneidad de las varianzas para dos poblaciones

Test no Paramétricos

Aplicación del test de hipótesis

Test de Correlación

Correlación entre (“volume”) y (“Height”)

Exploración de datos numéricos

Comprobación de supuestos

Aplicación de la correlación no paramétrica

Aplicación práctica de correlación

Selección de datos

Comprobación de supuestos

Correlación no paramétrica

Análisis de Varianza

Medidas descriptivas utiles para los ANOVAS

Post Hoc test (Paramétrico)

Comprobación estadística de los residuos

Test no paramétrico del ANDEVA

No homogeneidad, ni Distribución Normal de los residuos

Que hacer?

Aplicación práctica del ANDEVA

Supuesto del Modelo

Homogeneidad de Varianza

Post-Hoc

Regresiones

Modelo de Regresión

Revisamos los supuestos

Aplicación gráfica de la regresión

Valores influyentes del modelo

Extraemos el valor extremos y probamos un nuevo modelo

Análisis de Biodoversidad

Agrupamientos (Cluster)