Introducción

Algunos simbolos y comandos últiles en R

Simbolo	Utilidad
<-	Direccionar y crear objetos y vectores
*	Multiplicación
/	División
+	Suma
-	Resta
sqrt	Raíz Cuadrada
~	Operador para generar función de modelos de regresión y Andevas
log	Función logaritmica
exp	Logaritmo exponencial

Creación de objetos

a<-c(5,3,4,8,7,5,6,5,5,4,1,5,3,8,9,7,4,8,4,7,4,5,4,5,8,7)

Resumenes estadístios

summary(a)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   5.000   5.423   7.000   9.000

#Promedio
mean(a)

## [1] 5.423077

#Desviación estándar
sd(a)

## [1] 1.942639

#Mediana
median(a)

## [1] 5

#Valor máximo
max(a)

## [1] 9

#Valor Mínimo
min(a)

## [1] 1

#Cantidad de datos de un vector
length(a)

## [1] 26

#Tabulación cruzada
table(a)

## a
## 1 3 4 5 6 7 8 9 
## 1 2 6 7 1 4 4 1

#Diagrama de cajas
boxplot(a)

#Histograma
hist(a)

#Frecuencia Relativa
prop.table(table(a))

## a
##          1          3          4          5          6          7 
## 0.03846154 0.07692308 0.23076923 0.26923077 0.03846154 0.15384615 
##          8          9 
## 0.15384615 0.03846154

Distribución de frecuencias

library(fdth)

## 
## Attaching package: 'fdth'

## The following objects are masked from 'package:stats':
## 
##     sd, var

set.seed(2016)
datos<-rnorm(100, mean=25, sd=4)
dist1<-fdt(datos)
dist1

##     Class limits  f   rf rf(%)  cf cf(%)
##  [13.696,16.248)  3 0.03     3   3     3
##    [16.248,18.8)  3 0.03     3   6     6
##    [18.8,21.353) 12 0.12    12  18    18
##  [21.353,23.905) 31 0.31    31  49    49
##  [23.905,26.458) 19 0.19    19  68    68
##   [26.458,29.01) 19 0.19    19  87    87
##   [29.01,31.562) 11 0.11    11  98    98
##  [31.562,34.115)  2 0.02     2 100   100

#Cualtiles
quantile(a)

##   0%  25%  50%  75% 100% 
##    1    4    5    7    9

#Percentiles
quantile(a, .60)

## 60% 
##   5

#?fdth
#Histograma mediante el paquete de "fdth"
plot(dist1, x.round=2,xlas=1, cex.lab=.6, cex.axis=.6, xlab="Límite de clases", ylab="Frecuencia")

#Polígono de frecuencias mediante el paquete de "fdth"
plot(dist1, x.round=2,xlas=1, cex.lab=.6, cex.axis=.6, xlab="Límite de clases", ylab="Frecuencia", type="cfp", col="red")

Test de hipótesis de una Población

t.test(a)

## 
##  One Sample t-test
## 
## data:  a
## t = 14.234, df = 25, p-value = 1.702e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  4.638428 6.207726
## sample estimates:
## mean of x 
##  5.423077

Intérvalo de confianza

t.test(a, conf.level = .99)

## 
##  One Sample t-test
## 
## data:  a
## t = 14.234, df = 25, p-value = 1.702e-13
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
##  4.361112 6.485042
## sample estimates:
## mean of x 
##  5.423077

Test de hipótesis de una cola

t.test(a,mu=4, alternative = "l")

## 
##  One Sample t-test
## 
## data:  a
## t = 3.7353, df = 25, p-value = 0.9995
## alternative hypothesis: true mean is less than 4
## 95 percent confidence interval:
##      -Inf 6.073849
## sample estimates:
## mean of x 
##  5.423077

Para dos Poblaciones

placebo<-c(20,31,22,33,24,45,46,27,48,24,25,28,29,25)
droga<-  c(55,60,49,50,68,70,54,65,62,43,34,51,29,29)
length(placebo)

## [1] 14

length(droga)

## [1] 14

boxplot(placebo, droga, names = c("placebo", "droga"),col=c("whitesmoke","gray") )

Distribución Normal de datos

shapiro.test(placebo)

## 
##  Shapiro-Wilk normality test
## 
## data:  placebo
## W = 0.83741, p-value = 0.01505

shapiro.test(droga)

## 
##  Shapiro-Wilk normality test
## 
## data:  droga
## W = 0.93388, p-value = 0.3455

Homogeneidad de las varianzas para dos poblaciones

var.test(placebo,droga )

## 
##  F test to compare two variances
## 
## data:  placebo and droga
## F = 0.46366, num df = 13, denom df = 13, p-value = 0.1792
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1488468 1.4443274
## sample estimates:
## ratio of variances 
##          0.4636632

Test no Paramétricos

wilcox.test(placebo, droga)

## Warning in wilcox.test.default(placebo, droga): cannot compute exact p-
## value with ties

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  placebo and droga
## W = 17, p-value = 0.0002141
## alternative hypothesis: true location shift is not equal to 0

Aplicación del test de hipótesis

Test paramétricos de dos poblaciones

nofunamdores <- c(18,22,21,17,20,17,23,20,22,21)
fumadores <- c(16,20,14,21,20,18,13,15,17,21)

shapiro.test(nofunamdores)

## 
##  Shapiro-Wilk normality test
## 
## data:  nofunamdores
## W = 0.91199, p-value = 0.295

shapiro.test(fumadores)

## 
##  Shapiro-Wilk normality test
## 
## data:  fumadores
## W = 0.91941, p-value = 0.3521

#Test de Hipótesis
var.test(nofunamdores,fumadores )

## 
##  F test to compare two variances
## 
## data:  nofunamdores and fumadores
## F = 0.52102, num df = 9, denom df = 9, p-value = 0.3456
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1294138 2.0976199
## sample estimates:
## ratio of variances 
##          0.5210191

t.test(nofunamdores,fumadores, var.equal = T )

## 
##  Two Sample t-test
## 
## data:  nofunamdores and fumadores
## t = 2.2573, df = 18, p-value = 0.03665
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1801366 5.0198634
## sample estimates:
## mean of x mean of y 
##      20.1      17.5

Receso

Test de Correlación

Correlación entre (“volume”) y (“Height”)

data("trees")
head(trees)

##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7

Exploración de datos numéricos

plot(trees)
library(car)

scatterplotMatrix(trees)

Comprobación de supuestos

shapiro.test(trees$Height) #Normal

## 
##  Shapiro-Wilk normality test
## 
## data:  trees$Height
## W = 0.96545, p-value = 0.4034

shapiro.test(trees$Volume) #Asímetrico

## 
##  Shapiro-Wilk normality test
## 
## data:  trees$Volume
## W = 0.88757, p-value = 0.003579

Aplicación de la correlación no paramétrica

cor.test(trees$Girth,trees$Volume, method = "p", exact = F)

## 
##  Pearson's product-moment correlation
## 
## data:  trees$Girth and trees$Volume
## t = 20.478, df = 29, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.9322519 0.9841887
## sample estimates:
##       cor 
## 0.9671194

#Forma gráfica de la correlación
library(corrplot)

## Warning: package 'corrplot' was built under R version 3.3.1

corMat<-cor(trees, method = "p")
corrplot(corMat,  method="ellipse")

Aplicación práctica de correlación

library(car)
data("mtcars")
head(mtcars)

##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1

Selección de datos

mtcars2<-mtcars[,1:4]
head(mtcars2)

##                    mpg cyl disp  hp
## Mazda RX4         21.0   6  160 110
## Mazda RX4 Wag     21.0   6  160 110
## Datsun 710        22.8   4  108  93
## Hornet 4 Drive    21.4   6  258 110
## Hornet Sportabout 18.7   8  360 175
## Valiant           18.1   6  225 105

#Figuras gráficas de la coreelación entre variables
scatterplotMatrix(mtcars2)

Comprobación de supuestos

shapiro.test(mtcars2$mpg)

## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars2$mpg
## W = 0.94756, p-value = 0.1229

shapiro.test(mtcars2$cyl)

## 
##  Shapiro-Wilk normality test
## 
## data:  mtcars2$cyl
## W = 0.75331, p-value = 6.058e-06

Correlación no paramétrica

corMat<-cor(mtcars2, method = "p")
corMat

##             mpg        cyl       disp         hp
## mpg   1.0000000 -0.8521620 -0.8475514 -0.7761684
## cyl  -0.8521620  1.0000000  0.9020329  0.8324475
## disp -0.8475514  0.9020329  1.0000000  0.7909486
## hp   -0.7761684  0.8324475  0.7909486  1.0000000

corrplot(corMat,  method="ellipse")

Análisis de Varianza

data(InsectSprays)
attach(InsectSprays)
#Permite ver las caracteristicas de las variables
str(InsectSprays)

## 'data.frame':    72 obs. of  2 variables:
##  $ count: num  10 7 20 14 14 12 10 23 17 20 ...
##  $ spray: Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...

model_Insect<-aov(count ~ spray,data=InsectSprays)
summary(model_Insect)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## spray        5   2669   533.8    34.7 <2e-16 ***
## Residuals   66   1015    15.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Medidas descriptivas utiles para los ANOVAS

tapply(count, spray, mean) #Permite ver la media de los factores (Tratamientos)

##         A         B         C         D         E         F 
## 14.500000 15.333333  2.083333  4.916667  3.500000 16.666667

tapply(count, spray, sd) #Permite ver la desviacion estandar de los factores.

##        A        B        C        D        E        F 
## 4.719399 4.271115 1.975225 2.503028 1.732051 6.213378

tapply(count, spray, length) #Permite ver la cantidad de datos

##  A  B  C  D  E  F 
## 12 12 12 12 12 12

model.tables(model_Insect)#Permite medir el efecto de los tratamientos

## Tables of effects
## 
##  spray 
## spray
##      A      B      C      D      E      F 
##  5.000  5.833 -7.417 -4.583 -6.000  7.167

model.tables(model_Insect, type= "means")#Permite ver las medias de los tratamientos

## Tables of means
## Grand mean
##     
## 9.5 
## 
##  spray 
## spray
##      A      B      C      D      E      F 
## 14.500 15.333  2.083  4.917  3.500 16.667

boxplot(count ~ spray, data=InsectSprays, col="16", xlab="Tipos de Spray", ylab="Cantidad de Insectos")

#Editando la Figura
library(ggplot2)
ggplot(InsectSprays,aes(spray, count, colour = spray ))+geom_point() + geom_boxplot()

model_Insect<-aov(count~spray, data=InsectSprays)
summary(model_Insect)

##             Df Sum Sq Mean Sq F value Pr(>F)    
## spray        5   2669   533.8    34.7 <2e-16 ***
## Residuals   66   1015    15.4                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Comparaciones planeadas: Cuando tenemos sospecha de un posible resultado del análisis, es decir que entre grupos esperamos encontrar diferencias.

Ejm: cuando en un experimento hay un grupo control, a otro se le aplica un medicamento y otro grupo se le suministra mayor cantidad de un medicamento experimental. Posiblemente sospechemos que vamos a tener resultados diferentes (esto es lo que se entiende en el diseño de muestreo como un Experimento Planeado. Esto por tal debe se tratarse con la prueba de Fisher, en caso de los residuos cumplir con los supuyestos paramétricos.

Contrastes no planeados (post-hoc) Aquí hay múltiples técnicas, y lo que intenta es reducir posibilidad de errores tipo I (no acepta la hipótesis nula siendo esta verdadera), a costa de aumentar el error tipo II. Dicho de otro modo, en situaciones donde haya realmente diferencias entre grupos, las pruebas post-hoc no lo detectan. Por lo que se reconocen como los métodos más seguros, dado que detectan las verdaderas diferencias significativas.

Post Hoc test (Paramétrico)

#Test HSD (Honestly-significant-difference)
TukeyHSD(model_Insect)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = count ~ spray, data = InsectSprays)
## 
## $spray
##            diff        lwr       upr     p adj
## B-A   0.8333333  -3.866075  5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A  -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A   2.1666667  -2.532742  6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B   1.3333333  -3.366075  6.032742 0.9603075
## D-C   2.8333333  -1.866075  7.532742 0.4920707
## E-C   1.4166667  -3.282742  6.116075 0.9488669
## F-C  14.5833333   9.883925 19.282742 0.0000000
## E-D  -1.4166667  -6.116075  3.282742 0.9488669
## F-D  11.7500000   7.050591 16.449409 0.0000000
## F-E  13.1666667   8.467258 17.866075 0.0000000

#Forma Gráfica del HSD
plot(TukeyHSD(model_Insect),  cex.axis=.7)

Comprobación de los supuestos del andeva

par(mfrow=c(2,2)) #Particiona mi ventana grafica
plot(model_Insect)

Comprobación estadística de los residuos

#Normalidad de los residuos
shapiro.test(residuals(model_Insect))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_Insect)
## W = 0.96006, p-value = 0.02226

#Homogeneidad de varianzas
library(car)
levene.test(model_Insect)

## Warning: 'levene.test' is deprecated.
## Use 'leveneTest' instead.
## See help("Deprecated") and help("car-deprecated").

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value   Pr(>F)   
## group  5  3.8214 0.004223 **
##       66                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Dado que no cumplen con los supuestos paramétricos, la opción es aplicar es un test no paramétrico.

Test no paramétrico del ANDEVA

kruskal.test(count~spray, data=InsectSprays)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  count by spray
## Kruskal-Wallis chi-squared = 54.691, df = 5, p-value = 1.511e-10

No homogeneidad, ni Distribución Normal de los residuos

Que hacer?

#Comparaciones no planeadas "Dunn's-test" y "bonferroni"
library(PMCMR)
posthoc.kruskal.dunn.test(x=count, g=spray, p.adjust.method="none")

## Warning in posthoc.kruskal.dunn.test.default(x = count, g = spray,
## p.adjust.method = "none"): Ties are present. z-quantiles were corrected for
## ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  count and spray 
## 
##   A       B       C       D       E      
## B 0.75448 -       -       -       -      
## C 1.8e-06 3.6e-07 -       -       -      
## D 0.00182 0.00060 0.09762 -       -      
## E 0.00012 3.1e-05 0.35572 0.46358 -      
## F 0.68505 0.92603 2.2e-07 0.00043 2.1e-05
## 
## P value adjustment method: none

posthoc.kruskal.dunn.test(x=count, g=spray, p.adjust.method="bonferroni")

## Warning in posthoc.kruskal.dunn.test.default(x = count, g = spray,
## p.adjust.method = "bonferroni"): Ties are present. z-quantiles were
## corrected for ties.

## 
##  Pairwise comparisons using Dunn's-test for multiple 
##                          comparisons of independent samples 
## 
## data:  count and spray 
## 
##   A       B       C       D       E      
## B 1.00000 -       -       -       -      
## C 2.7e-05 5.5e-06 -       -       -      
## D 0.02735 0.00904 1.00000 -       -      
## E 0.00177 0.00047 1.00000 1.00000 -      
## F 1.00000 1.00000 3.3e-06 0.00640 0.00031
## 
## P value adjustment method: bonferroni

Aplicación práctica del ANDEVA

library(car)
data(Baumann)
head(Baumann)

##   group pretest.1 pretest.2 post.test.1 post.test.2 post.test.3
## 1 Basal         4         3           5           4          41
## 2 Basal         6         5           9           5          41
## 3 Basal         9         4           5           3          43
## 4 Basal        12         6           8           5          46
## 5 Basal        16         5          10           9          46
## 6 Basal        15        13           9           8          45

str(Baumann)

## 'data.frame':    66 obs. of  6 variables:
##  $ group      : Factor w/ 3 levels "Basal","DRTA",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ pretest.1  : int  4 6 9 12 16 15 14 12 12 8 ...
##  $ pretest.2  : int  3 5 4 6 5 13 8 7 3 8 ...
##  $ post.test.1: int  5 9 5 8 10 9 12 5 8 7 ...
##  $ post.test.2: int  4 5 3 5 9 8 5 5 7 7 ...
##  $ post.test.3: int  41 41 43 46 46 45 45 32 33 39 ...

attach(Baumann)
is.factor(group)

## [1] TRUE

is.numeric(pretest.1)

## [1] TRUE

attach(Baumann)

## The following objects are masked from Baumann (pos = 3):
## 
##     group, post.test.1, post.test.2, post.test.3, pretest.1,
##     pretest.2

model1<-aov(post.test.1~group,data=Baumann)
summary(model1)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## group        2  108.1   54.06   5.317 0.00735 **
## Residuals   63  640.5   10.17                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Supuesto del Modelo

par(mfrow=c(2,2))
plot(model1)

#Normalidad de los residuos
shapiro.test(residuals(model1))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model1)
## W = 0.97734, p-value = 0.2682

Homogeneidad de Varianza

leveneTest(post.test.1~group,data=Baumann)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  2.1297 0.1273
##       63

bartlett.test(post.test.1~group,data=Baumann)

## 
##  Bartlett test of homogeneity of variances
## 
## data:  post.test.1 by group
## Bartlett's K-squared = 3.7349, df = 2, p-value = 0.1545

Post-Hoc

TukeyHSD(model1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = post.test.1 ~ group, data = Baumann)
## 
## $group
##                  diff        lwr       upr     p adj
## DRTA-Basal   3.090909  0.7832953 5.3985228 0.0057599
## Strat-Basal  1.090909 -1.2167047 3.3985228 0.4965098
## Strat-DRTA  -2.000000 -4.3076137 0.3076137 0.1021195

plot(TukeyHSD(model1))

boxplot(post.test.1~group,data=Baumann)

library(ggplot2)
ggplot(Baumann, aes(x=factor(group), y=post.test.1, fill=group)) + geom_boxplot()

Regresiones

data("trees")
head(trees)

##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7

pairs(trees,pch=16, panel = panel.smooth)

Modelo de Regresión

reg1 <- lm(Volume ~ Height + Girth, data=trees)
summary(reg1)

## 
## Call:
## lm(formula = Volume ~ Height + Girth, data = trees)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4065 -2.6493 -0.2876  2.2003  8.4847 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
## Height        0.3393     0.1302   2.607   0.0145 *  
## Girth         4.7082     0.2643  17.816  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.882 on 28 degrees of freedom
## Multiple R-squared:  0.948,  Adjusted R-squared:  0.9442 
## F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16

Revisamos los supuestos

par(mfrow=c(2,2))
plot(reg1)

#Parece haber ciertos problemas en el cumplimiento de los supuestos!

reg2 <- lm(log(Volume) ~ log(Height) + log(Girth), data=trees)
summary(reg2)

## 
## Call:
## lm(formula = log(Volume) ~ log(Height) + log(Girth), data = trees)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.168561 -0.048488  0.002431  0.063637  0.129223 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.63162    0.79979  -8.292 5.06e-09 ***
## log(Height)  1.11712    0.20444   5.464 7.81e-06 ***
## log(Girth)   1.98265    0.07501  26.432  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08139 on 28 degrees of freedom
## Multiple R-squared:  0.9777, Adjusted R-squared:  0.9761 
## F-statistic: 613.2 on 2 and 28 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(reg2)

Aplicación gráfica de la regresión

library(visreg)

## Warning: package 'visreg' was built under R version 3.3.1

par(mfrow=c(2,2))
visreg(reg2)

library(ggplot2)#ggplot2 es una extension poderosa para graficar
ggplot(trees, aes(x=log(Height), y=log(Volume))) +
geom_point(shape=1) +    # genera circulos en el grafico
geom_smooth(method=lm)

ggplot(trees, aes(x=log(Girth), y=log(Volume))) +
geom_point() +
geom_smooth(span = 0.3)

## `geom_smooth()` using method = 'loess'

Valores influyentes del modelo

library(car)
influencePlot(reg1, id.n = 3)

##      StudRes       Hat     CookD
## 2   1.651668 0.1472096 0.1478463
## 3   1.567740 0.1768619 0.1673192
## 18 -1.859901 0.1434615 0.1775359
## 20 -1.105744 0.2112366 0.1082855
## 31  2.765603 0.2270585 0.6052326

Identificamos valores extremos en el modelo

outlierTest(reg1)

## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferonni p
## 31 2.765603           0.010122      0.31377

outlierTest(reg2)

## 
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferonni p
## 18 -2.32572           0.027789      0.86146

Extraemos el valor extremos y probamos un nuevo modelo

trees1<-trees[-c(2,3,18,20,31),]
trees1

##    Girth Height Volume
## 1    8.3     70   10.3
## 4   10.5     72   16.4
## 5   10.7     81   18.8
## 6   10.8     83   19.7
## 7   11.0     66   15.6
## 8   11.0     75   18.2
## 9   11.1     80   22.6
## 10  11.2     75   19.9
## 11  11.3     79   24.2
## 12  11.4     76   21.0
## 13  11.4     76   21.4
## 14  11.7     69   21.3
## 15  12.0     75   19.1
## 16  12.9     74   22.2
## 17  12.9     85   33.8
## 19  13.7     71   25.7
## 21  14.0     78   34.5
## 22  14.2     80   31.7
## 23  14.5     74   36.3
## 24  16.0     72   38.3
## 25  16.3     77   42.6
## 26  17.3     81   55.4
## 27  17.5     82   55.7
## 28  17.9     80   58.3
## 29  18.0     80   51.5
## 30  18.0     80   51.0

reg3 <- lm(log(Volume) ~ log(Height) + log(Girth), data=trees1)
summary(reg3)

## 
## Call:
## lm(formula = log(Volume) ~ log(Height) + log(Girth), data = trees1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.168557 -0.064486  0.008656  0.062461  0.119170 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.92713    1.14349  -6.932 4.57e-07 ***
## log(Height)  1.42237    0.27875   5.103 3.62e-05 ***
## log(Girth)   1.97364    0.08288  23.815  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08015 on 23 degrees of freedom
## Multiple R-squared:  0.9723, Adjusted R-squared:  0.9699 
## F-statistic: 404.2 on 2 and 23 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2))
plot(reg3)

#Criterio de selección del modelo (Akaike information criterion)

AIC(reg2,reg3)

## Warning in AIC.default(reg2, reg3): models are not all fitted to the same
## number of observations

##      df       AIC
## reg2  4 -62.71125
## reg3  4 -52.64131

Análisis de Biodoversidad

library(BiodiversityR)

## Loading required package: tcltk

## Loading required package: vegan

## Warning: package 'vegan' was built under R version 3.3.1

## Loading required package: permute

## Warning: package 'permute' was built under R version 3.3.1

## Loading required package: lattice

## This is vegan 2.4-1

## BiodiversityR 2.7-2: use function 'BiodiversityRGUI()' to launch the BiodiversityR Graphical User Interface

BiodiversityRGUI()

## Loading required namespace: dismo

## Warning: package 'RcmdrMisc' was built under R version 3.3.1

## Warning: package 'sandwich' was built under R version 3.3.1

## La interfaz R-Commander sólo funciona en sesiones interactivas

## 
## Attaching package: 'Rcmdr'

## The following objects are masked from 'package:tcltk':
## 
##     tclvalue, tkfocus

## Warning in if (class(eval1) == "RasterStack") {: la condición tiene longitud > 1
## y sólo el primer elemento será usado

## Warning in if (class(eval1) == "RasterStack") {: la condición tiene longitud > 1
## y sólo el primer elemento será usado

## Warning in if (class(eval1) == "RasterStack") {: la condición tiene longitud > 1
## y sólo el primer elemento será usado

## Fuente: BiodiversityGUI.R

beetles <- read.delim ('http://www.davidzeleny.net/anadat-r/data-download/carabid-beetles-boreal-forest.txt', row.names = 1)
head(beetles)

##                               moist.primeval.forests dry.primeval.forests
## Calathus.micropterus                             203                   91
## Pterostichus.oblongopunctatus                    101                   18
## Notiophilus.biguttatus                            73                   29
## Carabus.hortensis                                 68                    6
## Carabus.glabratus                                 22                    9
## Cychrus.caraboides                                41                    8
##                               managed.forests old.plantations young.plantations
## Calathus.micropterus                      161              29                48
## Pterostichus.oblongopunctatus              54               9                23
## Notiophilus.biguttatus                     37               1                 3
## Carabus.hortensis                          43               0                 2
## Carabus.glabratus                          22               1                15
## Cychrus.caraboides                         16               1                 6

#Permite transponer datos
t(beetles)->beetles2

#Permite obtener número de espececies por sitio (Riqueza)
specnumber(beetles2)

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##                     16                     13                     20 
##        old.plantations      young.plantations 
##                      9                     31

#Permite obtener la suma de las abundancias por sitio
apply(beetles2,1,sum)

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##                    597                    199                    381 
##        old.plantations      young.plantations 
##                     63                    243

#base ln (logaritmo neperiano)
H<-diversity(beetles2, index="shannon")  
H

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##               2.000573               1.834545               1.958269 
##        old.plantations      young.plantations 
##               1.524423               2.817914

#Indice de  simpson (Dominancia)
simp <- diversity(beetles2, "simpson")
 #reciproco de Simpson (Dominancia)
invsimp <- diversity(beetles2, "inv")   
invsimp #Aumenta cuando la comunidad es menos equitativa

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##               5.433975               3.946288               4.386987 
##        old.plantations      young.plantations 
##               3.389411              11.370884

#Pielou´s
J <- H/log(specnumber(beetles2))  
J# Pielou´s (valores>son equitativo, uniformes en las abundancias, valores< mayor dominancia, alta heterogeneidad).

## moist.primeval.forests   dry.primeval.forests        managed.forests 
##              0.7215542              0.7152364              0.6536864 
##        old.plantations      young.plantations 
##              0.6937949              0.8205954

#Permite estimar el número de especies no observadas.
specpool(beetles2)

##     Species chao  chao.se jack1 jack1.se jack2     boot  boot.se n
## All      39 55.2 10.35978  53.4 11.32784  60.6 45.56192 6.477809 5

#Curva de Acumulación
rarecurve (beetles2, col="blue", cex = 0.6)

RankAbun <- rankabundance(beetles2)
RankAbun

##                               rank abundance proportion plower pupper accumfreq
## Calathus.micropterus             1       532       35.9   25.0   46.7      35.9
## Pterostichus.oblongopunctatus    2       205       13.8    8.9   18.7      49.7
## Notiophilus.biguttatus           3       143        9.6    3.8   15.5      59.3
## Carabus.hortensis                4       119        8.0    1.4   14.6      67.4
## Cychrus.caraboides               5        72        4.9    2.0    7.7      72.2
## Trechus.secalis                  6        71        4.8   -1.8   11.4      77.0
## Carabus.glabratus                7        69        4.7    2.9    6.4      81.7
## Amara.brunnea                    8        57        3.8    0.8    6.9      85.5
## Leistus.terminatus               9        47        3.2    0.3    6.1      88.7
## Pterostichus.adstrictus         10        23        1.6   -3.1    6.2      90.2
## Miscodera.arctica               11        14        0.9   -1.7    3.6      91.2
## Notiophilus.reitteri            12        12        0.8   -0.2    1.8      92.0
## Harpalus.quadripunctatus        13        11        0.7   -0.7    2.2      92.7
## Notiophilus.palustris           14        11        0.7   -1.1    2.6      93.5
## Pterostichus.niger              15        11        0.7   -0.6    2.1      94.2
## Cicindela.sylvatica             16        10        0.7   -1.4    2.7      94.9
## Notiophilus.germinyi            17        10        0.7   -1.1    2.4      95.5
## Loricera.pilicornis             18         8        0.5   -0.9    2.0      96.1
## Agonum.mannerheimii             19         8        0.5   -0.4    1.4      96.6
## Pterostichus.strenuus           20         8        0.5   -0.7    1.8      97.2
## Amara.lunicollis                21         7        0.5   -1.0    1.9      97.6
## Pterostichus.diligens           22         4        0.3   -0.3    0.8      97.9
## Synuchus.vivalis                23         4        0.3   -0.5    1.1      98.2
## Calathus.melanocephalus         24         3        0.2   -0.4    0.8      98.4
## Cymindis.vaporariorum           25         3        0.2   -0.4    0.8      98.6
## Pterostichus.cupreus            26         3        0.2   -0.3    0.7      98.8
## Agonum.fuliginosum              27         2        0.1   -0.2    0.5      98.9
## Bembidion.gilvipes              28         2        0.1   -0.3    0.5      99.1
## Carabus.nitens                  29         2        0.1   -0.1    0.4      99.2
## Leistus.ferrugineus             30         2        0.1   -0.2    0.4      99.3
## Notiophilus.aestuans            31         2        0.1   -0.3    0.5      99.5
## Bradycellus.caucasicus          32         1        0.1   -0.1    0.3      99.5
## Dyschirius.globosus             33         1        0.1   -0.1    0.2      99.6
## Amara.familiaris                34         1        0.1   -0.1    0.3      99.7
## Carabus.violaceus               35         1        0.1   -0.1    0.3      99.7
## Dromius.agilis                  36         1        0.1   -0.1    0.3      99.8
## Harpalus.sp                     37         1        0.1   -0.1    0.3      99.9
## Patrobus.assimilis              38         1        0.1   -0.1    0.3      99.9
## Patrobus.atrorufus              39         1        0.1   -0.1    0.2     100.0
##                               logabun rankfreq
## Calathus.micropterus              2.7      2.6
## Pterostichus.oblongopunctatus     2.3      5.1
## Notiophilus.biguttatus            2.2      7.7
## Carabus.hortensis                 2.1     10.3
## Cychrus.caraboides                1.9     12.8
## Trechus.secalis                   1.9     15.4
## Carabus.glabratus                 1.8     17.9
## Amara.brunnea                     1.8     20.5
## Leistus.terminatus                1.7     23.1
## Pterostichus.adstrictus           1.4     25.6
## Miscodera.arctica                 1.1     28.2
## Notiophilus.reitteri              1.1     30.8
## Harpalus.quadripunctatus          1.0     33.3
## Notiophilus.palustris             1.0     35.9
## Pterostichus.niger                1.0     38.5
## Cicindela.sylvatica               1.0     41.0
## Notiophilus.germinyi              1.0     43.6
## Loricera.pilicornis               0.9     46.2
## Agonum.mannerheimii               0.9     48.7
## Pterostichus.strenuus             0.9     51.3
## Amara.lunicollis                  0.8     53.8
## Pterostichus.diligens             0.6     56.4
## Synuchus.vivalis                  0.6     59.0
## Calathus.melanocephalus           0.5     61.5
## Cymindis.vaporariorum             0.5     64.1
## Pterostichus.cupreus              0.5     66.7
## Agonum.fuliginosum                0.3     69.2
## Bembidion.gilvipes                0.3     71.8
## Carabus.nitens                    0.3     74.4
## Leistus.ferrugineus               0.3     76.9
## Notiophilus.aestuans              0.3     79.5
## Bradycellus.caucasicus            0.0     82.1
## Dyschirius.globosus               0.0     84.6
## Amara.familiaris                  0.0     87.2
## Carabus.violaceus                 0.0     89.7
## Dromius.agilis                    0.0     92.3
## Harpalus.sp                       0.0     94.9
## Patrobus.assimilis                0.0     97.4
## Patrobus.atrorufus                0.0    100.0

rankabunplot(RankAbun,scale='proportion', addit=FALSE, specnames=c(1,2,3,4))

Clase Taller I

Oscar Ramírez-Alán

9 de noviembre de 2016

Introducción

Creación de objetos

Resumenes estadístios

Distribución de frecuencias

Test de hipótesis de una Población

Intérvalo de confianza

Test de hipótesis de una cola

Para dos Poblaciones

Distribución Normal de datos

Homogeneidad de las varianzas para dos poblaciones

Test no Paramétricos

Aplicación del test de hipótesis

Test de Correlación

Correlación entre (“volume”) y (“Height”)

Exploración de datos numéricos

Comprobación de supuestos

Aplicación de la correlación no paramétrica

Aplicación práctica de correlación

Selección de datos

Comprobación de supuestos

Correlación no paramétrica

Análisis de Varianza

Medidas descriptivas utiles para los ANOVAS

Post Hoc test (Paramétrico)

Comprobación estadística de los residuos

Test no paramétrico del ANDEVA

No homogeneidad, ni Distribución Normal de los residuos

Que hacer?

Aplicación práctica del ANDEVA

Supuesto del Modelo

Homogeneidad de Varianza

Post-Hoc

Regresiones

Modelo de Regresión

Revisamos los supuestos

Aplicación gráfica de la regresión

Valores influyentes del modelo

Extraemos el valor extremos y probamos un nuevo modelo

Análisis de Biodoversidad