Objetivo General
. Aprender al manejo y análisis de datos con el Software libre R
Objetivos específicos
. Familiarizarse con el software R . Aprender a cómo manejar datos en R . Afrontar los tipos de análisis puedo aplicar a mis datos.
Algunos simbolos y comandos últiles en R
| Simbolo | Utilidad |
|---|---|
| <- | Direccionar y crear objetos y vectores |
| * | Multiplicación |
| / | División |
| + | Suma |
| - | Resta |
| sqrt | Raíz Cuadrada |
| ~ | Operador para generar función de modelos de regresión y Andevas |
| log | Función logaritmica |
| exp | Logaritmo exponencial |
a<-c(5,3,4,8,7,5,6,5,5,4,1,5,3,8,9,7,4,8,4,7,4,5,4,5,8,7)
summary(a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 5.000 5.423 7.000 9.000
#Promedio
mean(a)
## [1] 5.423077
#Desviación estándar
sd(a)
## [1] 1.942639
#Mediana
median(a)
## [1] 5
#Valor máximo
max(a)
## [1] 9
#Valor Mínimo
min(a)
## [1] 1
#Cantidad de datos de un vector
length(a)
## [1] 26
#Tabulación cruzada
table(a)
## a
## 1 3 4 5 6 7 8 9
## 1 2 6 7 1 4 4 1
#Diagrama de cajas
boxplot(a)
#Histograma
hist(a)
#Frecuencia Relativa
prop.table(table(a))
## a
## 1 3 4 5 6 7
## 0.03846154 0.07692308 0.23076923 0.26923077 0.03846154 0.15384615
## 8 9
## 0.15384615 0.03846154
library(fdth)
##
## Attaching package: 'fdth'
## The following objects are masked from 'package:stats':
##
## sd, var
set.seed(2016)
datos<-rnorm(100, mean=25, sd=4)
dist1<-fdt(datos)
dist1
## Class limits f rf rf(%) cf cf(%)
## [13.696,16.248) 3 0.03 3 3 3
## [16.248,18.8) 3 0.03 3 6 6
## [18.8,21.353) 12 0.12 12 18 18
## [21.353,23.905) 31 0.31 31 49 49
## [23.905,26.458) 19 0.19 19 68 68
## [26.458,29.01) 19 0.19 19 87 87
## [29.01,31.562) 11 0.11 11 98 98
## [31.562,34.115) 2 0.02 2 100 100
#Cualtiles
quantile(a)
## 0% 25% 50% 75% 100%
## 1 4 5 7 9
#Percentiles
quantile(a, .60)
## 60%
## 5
#?fdth
#Histograma mediante el paquete de "fdth"
plot(dist1, x.round=2,xlas=1, cex.lab=.6, cex.axis=.6, xlab="Límite de clases", ylab="Frecuencia")
#Polígono de frecuencias mediante el paquete de "fdth"
plot(dist1, x.round=2,xlas=1, cex.lab=.6, cex.axis=.6, xlab="Límite de clases", ylab="Frecuencia", type="cfp", col="red")
t.test(a)
##
## One Sample t-test
##
## data: a
## t = 14.234, df = 25, p-value = 1.702e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 4.638428 6.207726
## sample estimates:
## mean of x
## 5.423077
t.test(a, conf.level = .99)
##
## One Sample t-test
##
## data: a
## t = 14.234, df = 25, p-value = 1.702e-13
## alternative hypothesis: true mean is not equal to 0
## 99 percent confidence interval:
## 4.361112 6.485042
## sample estimates:
## mean of x
## 5.423077
t.test(a,mu=4, alternative = "l")
##
## One Sample t-test
##
## data: a
## t = 3.7353, df = 25, p-value = 0.9995
## alternative hypothesis: true mean is less than 4
## 95 percent confidence interval:
## -Inf 6.073849
## sample estimates:
## mean of x
## 5.423077
placebo<-c(20,31,22,33,24,45,46,27,48,24,25,28,29,25)
droga<- c(55,60,49,50,68,70,54,65,62,43,34,51,29,29)
length(placebo)
## [1] 14
length(droga)
## [1] 14
boxplot(placebo, droga, names = c("placebo", "droga"),col=c("whitesmoke","gray") )
shapiro.test(placebo)
##
## Shapiro-Wilk normality test
##
## data: placebo
## W = 0.83741, p-value = 0.01505
shapiro.test(droga)
##
## Shapiro-Wilk normality test
##
## data: droga
## W = 0.93388, p-value = 0.3455
var.test(placebo,droga )
##
## F test to compare two variances
##
## data: placebo and droga
## F = 0.46366, num df = 13, denom df = 13, p-value = 0.1792
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1488468 1.4443274
## sample estimates:
## ratio of variances
## 0.4636632
wilcox.test(placebo, droga)
## Warning in wilcox.test.default(placebo, droga): cannot compute exact p-
## value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: placebo and droga
## W = 17, p-value = 0.0002141
## alternative hypothesis: true location shift is not equal to 0
Test paramétricos de dos poblaciones
nofunamdores <- c(18,22,21,17,20,17,23,20,22,21)
fumadores <- c(16,20,14,21,20,18,13,15,17,21)
shapiro.test(nofunamdores)
##
## Shapiro-Wilk normality test
##
## data: nofunamdores
## W = 0.91199, p-value = 0.295
shapiro.test(fumadores)
##
## Shapiro-Wilk normality test
##
## data: fumadores
## W = 0.91941, p-value = 0.3521
#Test de Hipótesis
var.test(nofunamdores,fumadores )
##
## F test to compare two variances
##
## data: nofunamdores and fumadores
## F = 0.52102, num df = 9, denom df = 9, p-value = 0.3456
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1294138 2.0976199
## sample estimates:
## ratio of variances
## 0.5210191
t.test(nofunamdores,fumadores, var.equal = T )
##
## Two Sample t-test
##
## data: nofunamdores and fumadores
## t = 2.2573, df = 18, p-value = 0.03665
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.1801366 5.0198634
## sample estimates:
## mean of x mean of y
## 20.1 17.5
Receso
data("trees")
head(trees)
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
plot(trees)
library(car)
scatterplotMatrix(trees)
shapiro.test(trees$Height) #Normal
##
## Shapiro-Wilk normality test
##
## data: trees$Height
## W = 0.96545, p-value = 0.4034
shapiro.test(trees$Volume) #Asímetrico
##
## Shapiro-Wilk normality test
##
## data: trees$Volume
## W = 0.88757, p-value = 0.003579
cor.test(trees$Girth,trees$Volume, method = "p", exact = F)
##
## Pearson's product-moment correlation
##
## data: trees$Girth and trees$Volume
## t = 20.478, df = 29, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.9322519 0.9841887
## sample estimates:
## cor
## 0.9671194
#Forma gráfica de la correlación
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.1
corMat<-cor(trees, method = "p")
corrplot(corMat, method="ellipse")
library(car)
data("mtcars")
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
mtcars2<-mtcars[,1:4]
head(mtcars2)
## mpg cyl disp hp
## Mazda RX4 21.0 6 160 110
## Mazda RX4 Wag 21.0 6 160 110
## Datsun 710 22.8 4 108 93
## Hornet 4 Drive 21.4 6 258 110
## Hornet Sportabout 18.7 8 360 175
## Valiant 18.1 6 225 105
#Figuras gráficas de la coreelación entre variables
scatterplotMatrix(mtcars2)
shapiro.test(mtcars2$mpg)
##
## Shapiro-Wilk normality test
##
## data: mtcars2$mpg
## W = 0.94756, p-value = 0.1229
shapiro.test(mtcars2$cyl)
##
## Shapiro-Wilk normality test
##
## data: mtcars2$cyl
## W = 0.75331, p-value = 6.058e-06
corMat<-cor(mtcars2, method = "p")
corMat
## mpg cyl disp hp
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684
## cyl -0.8521620 1.0000000 0.9020329 0.8324475
## disp -0.8475514 0.9020329 1.0000000 0.7909486
## hp -0.7761684 0.8324475 0.7909486 1.0000000
corrplot(corMat, method="ellipse")
data(InsectSprays)
attach(InsectSprays)
#Permite ver las caracteristicas de las variables
str(InsectSprays)
## 'data.frame': 72 obs. of 2 variables:
## $ count: num 10 7 20 14 14 12 10 23 17 20 ...
## $ spray: Factor w/ 6 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...
model_Insect<-aov(count ~ spray,data=InsectSprays)
summary(model_Insect)
## Df Sum Sq Mean Sq F value Pr(>F)
## spray 5 2669 533.8 34.7 <2e-16 ***
## Residuals 66 1015 15.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tapply(count, spray, mean) #Permite ver la media de los factores (Tratamientos)
## A B C D E F
## 14.500000 15.333333 2.083333 4.916667 3.500000 16.666667
tapply(count, spray, sd) #Permite ver la desviacion estandar de los factores.
## A B C D E F
## 4.719399 4.271115 1.975225 2.503028 1.732051 6.213378
tapply(count, spray, length) #Permite ver la cantidad de datos
## A B C D E F
## 12 12 12 12 12 12
model.tables(model_Insect)#Permite medir el efecto de los tratamientos
## Tables of effects
##
## spray
## spray
## A B C D E F
## 5.000 5.833 -7.417 -4.583 -6.000 7.167
model.tables(model_Insect, type= "means")#Permite ver las medias de los tratamientos
## Tables of means
## Grand mean
##
## 9.5
##
## spray
## spray
## A B C D E F
## 14.500 15.333 2.083 4.917 3.500 16.667
boxplot(count ~ spray, data=InsectSprays, col="16", xlab="Tipos de Spray", ylab="Cantidad de Insectos")
#Editando la Figura
library(ggplot2)
ggplot(InsectSprays,aes(spray, count, colour = spray ))+geom_point() + geom_boxplot()
model_Insect<-aov(count~spray, data=InsectSprays)
summary(model_Insect)
## Df Sum Sq Mean Sq F value Pr(>F)
## spray 5 2669 533.8 34.7 <2e-16 ***
## Residuals 66 1015 15.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Comparaciones planeadas: Cuando tenemos sospecha de un posible resultado del análisis, es decir que entre grupos esperamos encontrar diferencias.
Ejm: cuando en un experimento hay un grupo control, a otro se le aplica un medicamento y otro grupo se le suministra mayor cantidad de un medicamento experimental. Posiblemente sospechemos que vamos a tener resultados diferentes (esto es lo que se entiende en el diseño de muestreo como un Experimento Planeado. Esto por tal debe se tratarse con la prueba de Fisher, en caso de los residuos cumplir con los supuyestos paramétricos.
Contrastes no planeados (post-hoc) Aquí hay múltiples técnicas, y lo que intenta es reducir posibilidad de errores tipo I (no acepta la hipótesis nula siendo esta verdadera), a costa de aumentar el error tipo II. Dicho de otro modo, en situaciones donde haya realmente diferencias entre grupos, las pruebas post-hoc no lo detectan. Por lo que se reconocen como los métodos más seguros, dado que detectan las verdaderas diferencias significativas.
#Test HSD (Honestly-significant-difference)
TukeyHSD(model_Insect)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = count ~ spray, data = InsectSprays)
##
## $spray
## diff lwr upr p adj
## B-A 0.8333333 -3.866075 5.532742 0.9951810
## C-A -12.4166667 -17.116075 -7.717258 0.0000000
## D-A -9.5833333 -14.282742 -4.883925 0.0000014
## E-A -11.0000000 -15.699409 -6.300591 0.0000000
## F-A 2.1666667 -2.532742 6.866075 0.7542147
## C-B -13.2500000 -17.949409 -8.550591 0.0000000
## D-B -10.4166667 -15.116075 -5.717258 0.0000002
## E-B -11.8333333 -16.532742 -7.133925 0.0000000
## F-B 1.3333333 -3.366075 6.032742 0.9603075
## D-C 2.8333333 -1.866075 7.532742 0.4920707
## E-C 1.4166667 -3.282742 6.116075 0.9488669
## F-C 14.5833333 9.883925 19.282742 0.0000000
## E-D -1.4166667 -6.116075 3.282742 0.9488669
## F-D 11.7500000 7.050591 16.449409 0.0000000
## F-E 13.1666667 8.467258 17.866075 0.0000000
#Forma Gráfica del HSD
plot(TukeyHSD(model_Insect), cex.axis=.7)
Comprobación de los supuestos del andeva
par(mfrow=c(2,2)) #Particiona mi ventana grafica
plot(model_Insect)
#Normalidad de los residuos
shapiro.test(residuals(model_Insect))
##
## Shapiro-Wilk normality test
##
## data: residuals(model_Insect)
## W = 0.96006, p-value = 0.02226
#Homogeneidad de varianzas
library(car)
levene.test(model_Insect)
## Warning: 'levene.test' is deprecated.
## Use 'leveneTest' instead.
## See help("Deprecated") and help("car-deprecated").
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 3.8214 0.004223 **
## 66
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Dado que no cumplen con los supuestos paramétricos, la opción es aplicar es un test no paramétrico.
kruskal.test(count~spray, data=InsectSprays)
##
## Kruskal-Wallis rank sum test
##
## data: count by spray
## Kruskal-Wallis chi-squared = 54.691, df = 5, p-value = 1.511e-10
#Comparaciones no planeadas "Dunn's-test" y "bonferroni"
library(PMCMR)
posthoc.kruskal.dunn.test(x=count, g=spray, p.adjust.method="none")
## Warning in posthoc.kruskal.dunn.test.default(x = count, g = spray,
## p.adjust.method = "none"): Ties are present. z-quantiles were corrected for
## ties.
##
## Pairwise comparisons using Dunn's-test for multiple
## comparisons of independent samples
##
## data: count and spray
##
## A B C D E
## B 0.75448 - - - -
## C 1.8e-06 3.6e-07 - - -
## D 0.00182 0.00060 0.09762 - -
## E 0.00012 3.1e-05 0.35572 0.46358 -
## F 0.68505 0.92603 2.2e-07 0.00043 2.1e-05
##
## P value adjustment method: none
posthoc.kruskal.dunn.test(x=count, g=spray, p.adjust.method="bonferroni")
## Warning in posthoc.kruskal.dunn.test.default(x = count, g = spray,
## p.adjust.method = "bonferroni"): Ties are present. z-quantiles were
## corrected for ties.
##
## Pairwise comparisons using Dunn's-test for multiple
## comparisons of independent samples
##
## data: count and spray
##
## A B C D E
## B 1.00000 - - - -
## C 2.7e-05 5.5e-06 - - -
## D 0.02735 0.00904 1.00000 - -
## E 0.00177 0.00047 1.00000 1.00000 -
## F 1.00000 1.00000 3.3e-06 0.00640 0.00031
##
## P value adjustment method: bonferroni
library(car)
data(Baumann)
head(Baumann)
## group pretest.1 pretest.2 post.test.1 post.test.2 post.test.3
## 1 Basal 4 3 5 4 41
## 2 Basal 6 5 9 5 41
## 3 Basal 9 4 5 3 43
## 4 Basal 12 6 8 5 46
## 5 Basal 16 5 10 9 46
## 6 Basal 15 13 9 8 45
str(Baumann)
## 'data.frame': 66 obs. of 6 variables:
## $ group : Factor w/ 3 levels "Basal","DRTA",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ pretest.1 : int 4 6 9 12 16 15 14 12 12 8 ...
## $ pretest.2 : int 3 5 4 6 5 13 8 7 3 8 ...
## $ post.test.1: int 5 9 5 8 10 9 12 5 8 7 ...
## $ post.test.2: int 4 5 3 5 9 8 5 5 7 7 ...
## $ post.test.3: int 41 41 43 46 46 45 45 32 33 39 ...
attach(Baumann)
is.factor(group)
## [1] TRUE
is.numeric(pretest.1)
## [1] TRUE
attach(Baumann)
## The following objects are masked from Baumann (pos = 3):
##
## group, post.test.1, post.test.2, post.test.3, pretest.1,
## pretest.2
model1<-aov(post.test.1~group,data=Baumann)
summary(model1)
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 108.1 54.06 5.317 0.00735 **
## Residuals 63 640.5 10.17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow=c(2,2))
plot(model1)
#Normalidad de los residuos
shapiro.test(residuals(model1))
##
## Shapiro-Wilk normality test
##
## data: residuals(model1)
## W = 0.97734, p-value = 0.2682
leveneTest(post.test.1~group,data=Baumann)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 2.1297 0.1273
## 63
bartlett.test(post.test.1~group,data=Baumann)
##
## Bartlett test of homogeneity of variances
##
## data: post.test.1 by group
## Bartlett's K-squared = 3.7349, df = 2, p-value = 0.1545
TukeyHSD(model1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = post.test.1 ~ group, data = Baumann)
##
## $group
## diff lwr upr p adj
## DRTA-Basal 3.090909 0.7832953 5.3985228 0.0057599
## Strat-Basal 1.090909 -1.2167047 3.3985228 0.4965098
## Strat-DRTA -2.000000 -4.3076137 0.3076137 0.1021195
plot(TukeyHSD(model1))
boxplot(post.test.1~group,data=Baumann)
library(ggplot2)
ggplot(Baumann, aes(x=factor(group), y=post.test.1, fill=group)) + geom_boxplot()
data("trees")
head(trees)
## Girth Height Volume
## 1 8.3 70 10.3
## 2 8.6 65 10.3
## 3 8.8 63 10.2
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
pairs(trees,pch=16, panel = panel.smooth)
reg1 <- lm(Volume ~ Height + Girth, data=trees)
summary(reg1)
##
## Call:
## lm(formula = Volume ~ Height + Girth, data = trees)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4065 -2.6493 -0.2876 2.2003 8.4847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -57.9877 8.6382 -6.713 2.75e-07 ***
## Height 0.3393 0.1302 2.607 0.0145 *
## Girth 4.7082 0.2643 17.816 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.882 on 28 degrees of freedom
## Multiple R-squared: 0.948, Adjusted R-squared: 0.9442
## F-statistic: 255 on 2 and 28 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(reg1)
#Parece haber ciertos problemas en el cumplimiento de los supuestos!
reg2 <- lm(log(Volume) ~ log(Height) + log(Girth), data=trees)
summary(reg2)
##
## Call:
## lm(formula = log(Volume) ~ log(Height) + log(Girth), data = trees)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.168561 -0.048488 0.002431 0.063637 0.129223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.63162 0.79979 -8.292 5.06e-09 ***
## log(Height) 1.11712 0.20444 5.464 7.81e-06 ***
## log(Girth) 1.98265 0.07501 26.432 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08139 on 28 degrees of freedom
## Multiple R-squared: 0.9777, Adjusted R-squared: 0.9761
## F-statistic: 613.2 on 2 and 28 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(reg2)
library(visreg)
## Warning: package 'visreg' was built under R version 3.3.1
par(mfrow=c(2,2))
visreg(reg2)
library(ggplot2)#ggplot2 es una extension poderosa para graficar
ggplot(trees, aes(x=log(Height), y=log(Volume))) +
geom_point(shape=1) + # genera circulos en el grafico
geom_smooth(method=lm)
ggplot(trees, aes(x=log(Girth), y=log(Volume))) +
geom_point() +
geom_smooth(span = 0.3)
## `geom_smooth()` using method = 'loess'
library(car)
influencePlot(reg1, id.n = 3)
## StudRes Hat CookD
## 2 1.651668 0.1472096 0.1478463
## 3 1.567740 0.1768619 0.1673192
## 18 -1.859901 0.1434615 0.1775359
## 20 -1.105744 0.2112366 0.1082855
## 31 2.765603 0.2270585 0.6052326
Identificamos valores extremos en el modelo
outlierTest(reg1)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 31 2.765603 0.010122 0.31377
outlierTest(reg2)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 18 -2.32572 0.027789 0.86146
trees1<-trees[-c(2,3,18,20,31),]
trees1
## Girth Height Volume
## 1 8.3 70 10.3
## 4 10.5 72 16.4
## 5 10.7 81 18.8
## 6 10.8 83 19.7
## 7 11.0 66 15.6
## 8 11.0 75 18.2
## 9 11.1 80 22.6
## 10 11.2 75 19.9
## 11 11.3 79 24.2
## 12 11.4 76 21.0
## 13 11.4 76 21.4
## 14 11.7 69 21.3
## 15 12.0 75 19.1
## 16 12.9 74 22.2
## 17 12.9 85 33.8
## 19 13.7 71 25.7
## 21 14.0 78 34.5
## 22 14.2 80 31.7
## 23 14.5 74 36.3
## 24 16.0 72 38.3
## 25 16.3 77 42.6
## 26 17.3 81 55.4
## 27 17.5 82 55.7
## 28 17.9 80 58.3
## 29 18.0 80 51.5
## 30 18.0 80 51.0
reg3 <- lm(log(Volume) ~ log(Height) + log(Girth), data=trees1)
summary(reg3)
##
## Call:
## lm(formula = log(Volume) ~ log(Height) + log(Girth), data = trees1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.168557 -0.064486 0.008656 0.062461 0.119170
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.92713 1.14349 -6.932 4.57e-07 ***
## log(Height) 1.42237 0.27875 5.103 3.62e-05 ***
## log(Girth) 1.97364 0.08288 23.815 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08015 on 23 degrees of freedom
## Multiple R-squared: 0.9723, Adjusted R-squared: 0.9699
## F-statistic: 404.2 on 2 and 23 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(reg3)
#Criterio de selección del modelo (Akaike information criterion)
AIC(reg2,reg3)
## Warning in AIC.default(reg2, reg3): models are not all fitted to the same
## number of observations
## df AIC
## reg2 4 -62.71125
## reg3 4 -52.64131
library(BiodiversityR)
## Loading required package: tcltk
## Loading required package: vegan
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-1
## BiodiversityR 2.7-2: use function 'BiodiversityRGUI()' to launch the BiodiversityR Graphical User Interface
BiodiversityRGUI()
## Loading required namespace: dismo
## La interfaz R-Commander sólo funciona en sesiones interactivas
##
## Attaching package: 'Rcmdr'
## The following objects are masked from 'package:tcltk':
##
## tclvalue, tkfocus
## Fuente: BiodiversityGUI.R
beetles <- read.delim ('http://www.davidzeleny.net/anadat-r/data-download/carabid-beetles-boreal-forest.txt', row.names = 1)
#Permite transponer datos
t(beetles)->beetles2
head(beetles2)
## Calathus.micropterus Pterostichus.oblongopunctatus
## moist.primeval.forests 203 101
## dry.primeval.forests 91 18
## managed.forests 161 54
## old.plantations 29 9
## young.plantations 48 23
## Notiophilus.biguttatus Carabus.hortensis
## moist.primeval.forests 73 68
## dry.primeval.forests 29 6
## managed.forests 37 43
## old.plantations 1 0
## young.plantations 3 2
## Carabus.glabratus Cychrus.caraboides Amara.brunnea
## moist.primeval.forests 22 41 30
## dry.primeval.forests 9 8 16
## managed.forests 22 16 9
## old.plantations 1 1 0
## young.plantations 15 6 2
## Trechus.secalis Leistus.terminatus Agonum.fuliginosum
## moist.primeval.forests 11 31 0
## dry.primeval.forests 11 4 0
## managed.forests 4 8 2
## old.plantations 15 1 0
## young.plantations 30 3 0
## Agonum.mannerheimii Amara.familiaris Amara.lunicollis
## moist.primeval.forests 7 0 0
## dry.primeval.forests 0 0 0
## managed.forests 1 0 0
## old.plantations 0 0 0
## young.plantations 0 1 7
## Bembidion.gilvipes Bradycellus.caucasicus
## moist.primeval.forests 0 0
## dry.primeval.forests 0 0
## managed.forests 0 0
## old.plantations 0 0
## young.plantations 2 1
## Calathus.melanocephalus Carabus.nitens Carabus.violaceus
## moist.primeval.forests 0 0 0
## dry.primeval.forests 0 0 0
## managed.forests 0 1 0
## old.plantations 0 0 0
## young.plantations 3 1 1
## Cicindela.sylvatica Cymindis.vaporariorum Dromius.agilis
## moist.primeval.forests 0 0 0
## dry.primeval.forests 0 0 0
## managed.forests 0 0 1
## old.plantations 0 0 0
## young.plantations 10 3 0
## Dyschirius.globosus Harpalus.quadripunctatus Harpalus.sp
## moist.primeval.forests 1 1 0
## dry.primeval.forests 0 3 0
## managed.forests 0 0 0
## old.plantations 0 0 0
## young.plantations 0 7 1
## Leistus.ferrugineus Loricera.pilicornis
## moist.primeval.forests 0 0
## dry.primeval.forests 1 0
## managed.forests 0 8
## old.plantations 0 0
## young.plantations 1 0
## Miscodera.arctica Notiophilus.aestuans
## moist.primeval.forests 0 0
## dry.primeval.forests 0 0
## managed.forests 1 0
## old.plantations 0 0
## young.plantations 13 2
## Notiophilus.germinyi Notiophilus.palustris
## moist.primeval.forests 1 0
## dry.primeval.forests 0 1
## managed.forests 0 1
## old.plantations 0 0
## young.plantations 9 9
## Notiophilus.reitteri Patrobus.assimilis
## moist.primeval.forests 5 0
## dry.primeval.forests 0 0
## managed.forests 7 1
## old.plantations 0 0
## young.plantations 0 0
## Patrobus.atrorufus Pterostichus.adstrictus
## moist.primeval.forests 1 0
## dry.primeval.forests 0 0
## managed.forests 0 0
## old.plantations 0 0
## young.plantations 0 23
## Pterostichus.cupreus Pterostichus.diligens
## moist.primeval.forests 0 0
## dry.primeval.forests 2 0
## managed.forests 0 1
## old.plantations 0 2
## young.plantations 1 1
## Pterostichus.niger Pterostichus.strenuus
## moist.primeval.forests 1 0
## dry.primeval.forests 0 0
## managed.forests 3 0
## old.plantations 0 4
## young.plantations 7 4
## Synuchus.vivalis
## moist.primeval.forests 0
## dry.primeval.forests 0
## managed.forests 0
## old.plantations 0
## young.plantations 4
#Permite obtener número de espececies por sitio (Riqueza)
specnumber(beetles2)
## moist.primeval.forests dry.primeval.forests managed.forests
## 16 13 20
## old.plantations young.plantations
## 9 31
#Permite obtener la suma de las abundancias por sitio
apply(beetles2,1,sum)
## moist.primeval.forests dry.primeval.forests managed.forests
## 597 199 381
## old.plantations young.plantations
## 63 243
#base ln (logaritmo neperiano)
H<-diversity(beetles2, index="shannon")
H
## moist.primeval.forests dry.primeval.forests managed.forests
## 2.000573 1.834545 1.958269
## old.plantations young.plantations
## 1.524423 2.817914
#Indice de simpson (Dominancia)
simp <- diversity(beetles2, "simpson")
#reciproco de Simpson (Dominancia)
invsimp <- diversity(beetles2, "inv")
invsimp #Aumenta cuando la comunidad es menos equitativa
## moist.primeval.forests dry.primeval.forests managed.forests
## 5.433975 3.946288 4.386987
## old.plantations young.plantations
## 3.389411 11.370884
#Pielou´s
J <- H/log(specnumber(beetles2))
J# Pielou´s (valores>son equitativo, uniformes en las abundancias, valores< mayor dominancia, alta heterogeneidad).
## moist.primeval.forests dry.primeval.forests managed.forests
## 0.7215542 0.7152364 0.6536864
## old.plantations young.plantations
## 0.6937949 0.8205954
#Permite estimar el número de especies no observadas.
specpool(beetles2)
## Species chao chao.se jack1 jack1.se jack2 boot boot.se n
## All 39 55.2 10.35978 53.4 11.32784 60.6 45.56192 6.477809 5
#Curva de Acumulación
acr<-specaccum(beetles2, "rarefaction")
acr
## Species Accumulation Curve
## Accumulation method: rarefaction
## Call: specaccum(comm = beetles2, method = "rarefaction")
##
##
## Sites 1.0013 1.9993 3.0007 3.9987 5
## Individuals 297.0000 593.0000 890.0000 1186.0000 1483
## Richness 25.7627 31.3896 34.7565 37.1708 39
## sd 2.1465 1.9601 1.6713 1.2204 0
plot(acr, xlab = "# of samples", ylab = "# of species")
plot(acr,ci.type="poly", col="blue", lwd=2, ci.lty=0, ci.col="lightblue")
#Curva de Acumulación por sitios
rarecurve (beetles2, col="blue", cex = 0.6)
RankAbun <- rankabundance(beetles2)
RankAbun
## rank abundance proportion plower pupper accumfreq
## Calathus.micropterus 1 532 35.9 25.0 46.7 35.9
## Pterostichus.oblongopunctatus 2 205 13.8 8.9 18.7 49.7
## Notiophilus.biguttatus 3 143 9.6 3.8 15.5 59.3
## Carabus.hortensis 4 119 8.0 1.4 14.6 67.4
## Cychrus.caraboides 5 72 4.9 2.0 7.7 72.2
## Trechus.secalis 6 71 4.8 -1.8 11.4 77.0
## Carabus.glabratus 7 69 4.7 2.9 6.4 81.7
## Amara.brunnea 8 57 3.8 0.8 6.9 85.5
## Leistus.terminatus 9 47 3.2 0.3 6.1 88.7
## Pterostichus.adstrictus 10 23 1.6 -3.1 6.2 90.2
## Miscodera.arctica 11 14 0.9 -1.7 3.6 91.2
## Notiophilus.reitteri 12 12 0.8 -0.2 1.8 92.0
## Harpalus.quadripunctatus 13 11 0.7 -0.7 2.2 92.7
## Notiophilus.palustris 14 11 0.7 -1.1 2.6 93.5
## Pterostichus.niger 15 11 0.7 -0.6 2.1 94.2
## Cicindela.sylvatica 16 10 0.7 -1.4 2.7 94.9
## Notiophilus.germinyi 17 10 0.7 -1.1 2.4 95.5
## Loricera.pilicornis 18 8 0.5 -0.9 2.0 96.1
## Agonum.mannerheimii 19 8 0.5 -0.4 1.4 96.6
## Pterostichus.strenuus 20 8 0.5 -0.7 1.8 97.2
## Amara.lunicollis 21 7 0.5 -1.0 1.9 97.6
## Pterostichus.diligens 22 4 0.3 -0.3 0.8 97.9
## Synuchus.vivalis 23 4 0.3 -0.5 1.1 98.2
## Calathus.melanocephalus 24 3 0.2 -0.4 0.8 98.4
## Cymindis.vaporariorum 25 3 0.2 -0.4 0.8 98.6
## Pterostichus.cupreus 26 3 0.2 -0.3 0.7 98.8
## Agonum.fuliginosum 27 2 0.1 -0.2 0.5 98.9
## Bembidion.gilvipes 28 2 0.1 -0.3 0.5 99.1
## Carabus.nitens 29 2 0.1 -0.1 0.4 99.2
## Leistus.ferrugineus 30 2 0.1 -0.2 0.4 99.3
## Notiophilus.aestuans 31 2 0.1 -0.3 0.5 99.5
## Bradycellus.caucasicus 32 1 0.1 -0.1 0.3 99.5
## Dyschirius.globosus 33 1 0.1 -0.1 0.2 99.6
## Amara.familiaris 34 1 0.1 -0.1 0.3 99.7
## Carabus.violaceus 35 1 0.1 -0.1 0.3 99.7
## Dromius.agilis 36 1 0.1 -0.1 0.3 99.8
## Harpalus.sp 37 1 0.1 -0.1 0.3 99.9
## Patrobus.assimilis 38 1 0.1 -0.1 0.3 99.9
## Patrobus.atrorufus 39 1 0.1 -0.1 0.2 100.0
## logabun rankfreq
## Calathus.micropterus 2.7 2.6
## Pterostichus.oblongopunctatus 2.3 5.1
## Notiophilus.biguttatus 2.2 7.7
## Carabus.hortensis 2.1 10.3
## Cychrus.caraboides 1.9 12.8
## Trechus.secalis 1.9 15.4
## Carabus.glabratus 1.8 17.9
## Amara.brunnea 1.8 20.5
## Leistus.terminatus 1.7 23.1
## Pterostichus.adstrictus 1.4 25.6
## Miscodera.arctica 1.1 28.2
## Notiophilus.reitteri 1.1 30.8
## Harpalus.quadripunctatus 1.0 33.3
## Notiophilus.palustris 1.0 35.9
## Pterostichus.niger 1.0 38.5
## Cicindela.sylvatica 1.0 41.0
## Notiophilus.germinyi 1.0 43.6
## Loricera.pilicornis 0.9 46.2
## Agonum.mannerheimii 0.9 48.7
## Pterostichus.strenuus 0.9 51.3
## Amara.lunicollis 0.8 53.8
## Pterostichus.diligens 0.6 56.4
## Synuchus.vivalis 0.6 59.0
## Calathus.melanocephalus 0.5 61.5
## Cymindis.vaporariorum 0.5 64.1
## Pterostichus.cupreus 0.5 66.7
## Agonum.fuliginosum 0.3 69.2
## Bembidion.gilvipes 0.3 71.8
## Carabus.nitens 0.3 74.4
## Leistus.ferrugineus 0.3 76.9
## Notiophilus.aestuans 0.3 79.5
## Bradycellus.caucasicus 0.0 82.1
## Dyschirius.globosus 0.0 84.6
## Amara.familiaris 0.0 87.2
## Carabus.violaceus 0.0 89.7
## Dromius.agilis 0.0 92.3
## Harpalus.sp 0.0 94.9
## Patrobus.assimilis 0.0 97.4
## Patrobus.atrorufus 0.0 100.0
rankabunplot(RankAbun,scale='proportion', addit=FALSE, specnames=c(1,2,3,4))
``
library(cluster)
par(mfrwo=c(2,2))
## Warning in par(mfrwo = c(2, 2)): "mfrwo" is not a graphical parameter
ag1<-agnes(beetles2, method="single")
ag2<-agnes(beetles2, method="complete")
ag3<-agnes(beetles2, method="average")
plot(ag1)
plot(ag2)
plot(ag3)
be.dist<-vegdist(beetles2, method = "bray")
hclust <- hclust(be.dist, method = "average")
plot(hclust, ylab = "Bray-Curtis dissimilarity")
library(vegan)
data("dune")
H<-diversity(dune)
data("dune.env")
boxplot(specnumber(dune) ~ dune.env$Use, col="gray", border="blue")
pool <- specpool(dune, dune.env$Use)
pool
## Species chao chao.se jack1 jack1.se jack2 boot boot.se n
## Hayfield 27 29.33333 2.459549 33.0 3.464102 32.64286 30.32105 2.483249 7
## Haypastu 24 31.00000 6.614378 31.0 4.077377 34.42857 27.22466 2.228030 8
## Pasture 23 29.48000 5.791414 30.2 5.600000 33.35000 26.40032 3.009648 5
reg.riq<-lm(specnumber(dune) ~ dune.env$Use)
summary(reg.riq)
##
## Call:
## lm(formula = specnumber(dune) ~ dune.env$Use)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.500 -1.575 0.000 1.836 3.714
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.8619 0.5753 17.143 3.66e-12 ***
## dune.env$Use.L -0.3435 1.0447 -0.329 0.746
## dune.env$Use.Q 0.4432 0.9457 0.469 0.645
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.523 on 17 degrees of freedom
## Multiple R-squared: 0.021, Adjusted R-squared: -0.09418
## F-statistic: 0.1823 on 2 and 17 DF, p-value: 0.8349
reg.div<-lm(H ~ dune.env$Use)
summary(reg.div)
##
## Call:
## lm(formula = H ~ dune.env$Use)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66613 -0.15333 0.00045 0.24641 0.32016
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.16626 0.06398 33.859 <2e-16 ***
## dune.env$Use.L -0.04080 0.11618 -0.351 0.730
## dune.env$Use.Q 0.07305 0.10517 0.695 0.497
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2806 on 17 degrees of freedom
## Multiple R-squared: 0.03765, Adjusted R-squared: -0.07556
## F-statistic: 0.3326 on 2 and 17 DF, p-value: 0.7216
library(vegan)
data(dune.env)
data(dune)
dune.env$site.totals <- apply(dune,1,sum)
Accum.1 <- accumresult(dune, y=dune.env, scale='site.totals', method='exact', conditioned=TRUE)
Accum.1
## Species Accumulation Curve
## Accumulation method: exact
## Call: specaccum(comm = x, method = method, permutations = permutations, conditioned = conditioned, gamma = gamma)
##
##
## Sites 34.2500 68.5000 102.7500 137.0000 171.2500 205.5000 239.7500 274.0000
## Richness 9.8500 15.1105 18.5105 20.9375 22.7543 24.1496 25.2396 26.1035
## sd 2.3511 1.8764 1.5723 1.4470 1.3902 1.3530 1.3165 1.2749
##
## Sites 308.2500 342.5000 376.7500 411.0000 445.2500 479.5000 513.7500
## Richness 26.7982 27.3650 27.8340 28.2275 28.5620 28.8496 29.0996
## sd 1.2282 1.1763 1.1193 1.0565 0.9874 0.9116 0.8287
##
## Sites 548.0000 582.2500 616.5000 650.7500 685
## Richness 29.3191 29.5140 29.6895 29.8500 30
## sd 0.7381 0.6334 0.5140 0.3571 0
accumplot(Accum.1)
accumcomp(dune, y=dune.env, factor='Management', method='exact', legend=FALSE, conditioned=TRUE)
## , , = Sites
##
## obs
## Management 1 2 3 4 5 6
## BF 1 2 3 NA NA NA
## HF 1 2 3 4 5 NA
## NM 1 2 3 4 5 6
## SF 1 2 3 4 5 6
##
## , , = Richness
##
## obs
## Management 1 2 3 4 5 6
## BF 10.333333 14.33333 16.00 NA NA NA
## HF 12.600000 16.70000 19.10 20.40000 21.00000 NA
## NM 8.000000 12.86667 16.20 18.46667 20.00000 21
## SF 9.166667 13.60000 16.25 18.13333 19.66667 21
##
## , , = sd
##
## obs
## Management 1 2 3 4 5 6
## BF 1.2472191 0.4714045 0.000000 NA NA NA
## HF 1.0198039 1.3343708 1.732051 0.800000 0.000000 NA
## NM 0.8164966 1.0011583 1.346070 1.642761 1.154701 0
## SF 2.4094720 1.4282970 1.225156 1.231079 0.942809 0