una vez que estÔn instalados deben activarse los paquetes con la función library en R:
library(readxl)
library(ggplot2)
library(dplyr)
library(multcomp)
library(EnvStats)
library(agricolae)
Cargar los datos en el sistema, para los anĆ”lisis. El archivo Excel debe estar en el mismo āfolderā que el archivo del documento .Rmd; este folder deberĆa ser su directorio ādefaultā en RStudio.
semillas <- read_excel("LabEcoPob variacion poblacional 2016 2020.xlsx",
sheet = "matriz")
head(semillas)
## # A tibble: 6 x 7
## arbol grupo fruto semilla L masa nsemillas
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 1 1 1 10.5 0.29 5
## 2 1 1 1 2 10.3 0.25 5
## 3 1 1 1 3 10.1 0.27 5
## 4 1 1 1 4 10.1 0.26 5
## 5 1 1 1 5 10.3 0.24 5
## 6 1 1 2 1 10.4 0.28 9
Basadas en las fuentes de variación:
A continuación se muestran ejemplos del uso de diversas funciones y pruebas estadĆsticas, utilizando la variable L (longitud de semilla, en mm); para su informe deberĆ” utilizar tambiĆ©n la variable masa (masa de cada semilla, g).
# summary de todas las variables
summary(semillas)
## arbol grupo fruto semilla L
## Min. :1.000 Min. :1.00 Min. :1.000 Min. : 1.00 Min. : 7.10
## 1st Qu.:1.000 1st Qu.:2.00 1st Qu.:1.000 1st Qu.: 2.00 1st Qu.: 9.30
## Median :2.000 Median :3.00 Median :1.000 Median : 4.00 Median :10.10
## Mean :2.103 Mean :2.87 Mean :1.462 Mean : 4.65 Mean :10.27
## 3rd Qu.:3.000 3rd Qu.:4.00 3rd Qu.:2.000 3rd Qu.: 6.50 3rd Qu.:11.00
## Max. :3.000 Max. :5.00 Max. :2.000 Max. :13.00 Max. :13.50
## masa nsemillas
## Min. :0.0320 Min. : 3.0
## 1st Qu.:0.2010 1st Qu.: 7.0
## Median :0.2200 Median : 8.0
## Mean :0.2225 Mean : 8.3
## 3rd Qu.:0.2500 3rd Qu.:10.0
## Max. :0.3280 Max. :13.0
# summary escogiendo variables
variables_num <- c("L", "masa")
summary(semillas[ ,variables_num])
## L masa
## Min. : 7.10 Min. :0.0320
## 1st Qu.: 9.30 1st Qu.:0.2010
## Median :10.10 Median :0.2200
## Mean :10.27 Mean :0.2225
## 3rd Qu.:11.00 3rd Qu.:0.2500
## Max. :13.50 Max. :0.3280
# table para frecuencia por Ɣrbol
table(semillas$arbol)
##
## 1 2 3
## 60 80 83
#función para definir estadĆsticos a calcular
mystats <- function(x){
m <- mean(x)
md <- median(x)
n <- length(x)
s <- sd(x)
se <- sd(x)/sqrt(length(x))
skew <- sum((x-m)^3/s^3)/n
kurt <- sum((x-m)^4/s^4)/n - 3
return(c(n=n, mean=m, median=md, stdev=s, serror=se, skew=skew, kurtosis=kurt))
}
# función sapply con split para aplicar la función a datos de cada Ôrbol
sapply(split(semillas$L, semillas$arbol), mystats)
## 1 2 3
## n 60.0000000 80.00000000 83.00000000
## mean 11.8165000 10.01662500 9.40481928
## median 11.9000000 10.00500000 9.50000000
## stdev 0.8889316 0.75782658 0.80802433
## serror 0.1147606 0.08472759 0.08869219
## skew -0.2814080 0.29338334 -0.54568049
## kurtosis -0.9816538 -0.52665455 0.28381366
#histograma y curva normal para cada arbol; variable longitud
#arbol 1
arbol1 <- semillas %>%
filter(arbol == "1")
msa1 <- semillas %>%
filter(arbol == "1") %>%
summarise(means = mean(L), sd=sd(L))
ggplot(arbol1, aes(x=L)) +
geom_histogram(aes(y=..density..), position="identity", binwidth=.25, color="red", fill="gray") +
stat_function(fun = dnorm, color="red", size=1, args=list(mean=msa1$means, sd=msa1$sd)) +
labs(x="Longitud semilla, mm", y="densidad") +
annotate("text", x = 11, y = 1, label = "Arbol 1")
#arbol 2
arbol2 <- semillas %>%
filter(arbol == "2")
msa2 <- semillas %>%
filter(arbol == "2") %>%
summarise(means = mean(L), sd=sd(L))
ggplot(arbol2, aes(x=L)) +
geom_histogram(aes(y=..density..), position="identity", binwidth=.25, color="blue", fill="gray") +
stat_function(fun = dnorm, color="blue", size=1, args=list(mean=msa2$means, sd=msa2$sd)) +
labs(x="Longitud semilla, mm", y="densidad") +
annotate("text", x = 11, y = 1, label = "Arbol 2")
#arbol 3
arbol3 <- semillas %>%
filter(arbol == "3")
msa3 <- semillas %>%
filter(arbol == "3") %>%
summarise(means = mean(L), sd=sd(L))
ggplot(arbol3, aes(x=L)) +
geom_histogram(aes(y=..density..), position="identity", binwidth=.25, color="green", fill="gray") +
stat_function(fun = dnorm, color="green", size=1, args=list(mean=msa3$means, sd=msa3$sd)) +
labs(x="Longitud semilla, mm", y="densidad") +
annotate("text", x = 11, y = 1, label = "Arbol 3")
#las tres curvas normales juntas
ggplot(semillas, aes(x=L)) +
stat_function(fun = dnorm, args=list(mean=msa1$means, sd=msa1$sd), aes(colour = "Arbol 1"), size=1.5) +
stat_function(fun = dnorm, args=list(mean=msa2$means, sd=msa2$sd), aes(colour = "Arbol 2"), size=1.5) +
stat_function(fun = dnorm, args=list(mean=msa3$means, sd=msa3$sd), aes(colour = "Arbol 3"), size=1.5) +
scale_colour_manual("T. populnea", values = c("red","blue","green")) +
labs(x="Longitud semilla, mm", y="densidad")
long <- ggplot(semillas, aes(x = arbol, y=L, group=arbol)) + geom_boxplot(fill="cornflowerblue") +
stat_summary(aes(group=arbol), fun.y=mean, colour="darkred", geom="point", shape=18, size=3) +
labs(x = "Arbol", y = "Longitud semilla, mm")
## Warning: `fun.y` is deprecated. Use `fun` instead.
long
library(EnvStats)
#Q-Q plot
qqPlot(semillas$L, add.line = TRUE, points.col = "blue", line.col = "red")
#prueba Shapiro-Wilk
shapiro.test(semillas$L)
##
## Shapiro-Wilk normality test
##
## data: semillas$L
## W = 0.97068, p-value = 0.0001398
bartlett.test(L ~ arbol, data=semillas)
##
## Bartlett test of homogeneity of variances
##
## data: L by arbol
## Bartlett's K-squared = 1.7397, df = 2, p-value = 0.419
analisis <- aov(L ~ as.factor(arbol), data=semillas)
summary(analisis)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(arbol) 2 210.8 105.38 159.3 <2e-16 ***
## Residuals 220 145.5 0.66
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(agricolae)
posthoc <- TukeyHSD(analisis)
posthoc
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = L ~ as.factor(arbol), data = semillas)
##
## $`as.factor(arbol)`
## diff lwr upr p adj
## 2-1 -1.7998750 -2.1276397 -1.4721103 0.0e+00
## 3-1 -2.4116807 -2.7368969 -2.0864646 0.0e+00
## 3-2 -0.6118057 -0.9125023 -0.3111091 8.7e-06
#grƔfica
par(cex = 0.6)
plot(posthoc)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:EnvStats':
##
## qqPlot
## The following object is masked from 'package:dplyr':
##
## recode
# regresión lineal simple
rls_1 <- lm(L ~ masa, data=semillas)
summary(rls_1)
##
## Call:
## lm(formula = L ~ masa, data = semillas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2061 -0.7771 -0.1554 0.6326 3.4516
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.1641 0.3301 21.704 <2e-16 ***
## masa 13.9740 1.4485 9.647 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.065 on 221 degrees of freedom
## Multiple R-squared: 0.2963, Adjusted R-squared: 0.2931
## F-statistic: 93.07 on 1 and 221 DF, p-value: < 2.2e-16
plot(L ~ masa, data=semillas)
abline(rls_1)
#residuales
spreadLevelPlot(rls_1)
##
## Suggested power transformation: 1.693059
rls_3 <- lm(L ~ nsemillas, data=semillas)
summary(rls_3)
##
## Call:
## lm(formula = L ~ nsemillas, data = semillas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5886 -0.8110 -0.0334 0.7390 3.0631
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.31793 0.28247 40.068 < 2e-16 ***
## nsemillas -0.12587 0.03255 -3.866 0.000145 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.229 on 221 degrees of freedom
## Multiple R-squared: 0.06336, Adjusted R-squared: 0.05912
## F-statistic: 14.95 on 1 and 221 DF, p-value: 0.0001453
plot(L ~ nsemillas, data=semillas)
abline(rls_3)
#residuales
spreadLevelPlot(rls_3)
##
## Suggested power transformation: -10.89188