una vez que estÔn instalados deben activarse los paquetes con la función library en R:
library(readxl) # para leer desde Excel
library(ggplot2)
library(dplyr)
library(multcomp) # para Tukey
library(EnvStats)
semillas <- read.csv("data/LabEcoPob variacion poblacional 20210826 - datos_masa_long.csv")
head(semillas)
## grupo arbol fruto semilla long.mm masa.g n.sem.fruto
## 1 A 1 1 1 10.8 0.258 7
## 2 A 1 1 2 11.9 0.231 7
## 3 A 1 1 3 11.0 0.289 7
## 4 A 1 1 4 11.2 0.273 7
## 5 A 1 1 5 11.0 0.266 7
## 6 A 1 1 6 10.9 0.244 7
variables_num <- c("long.mm", "masa.g")
summary(semillas[ ,variables_num])
## long.mm masa.g
## Min. : 8.600 Min. :0.0500
## 1st Qu.: 9.775 1st Qu.:0.1780
## Median :10.200 Median :0.1940
## Mean :10.484 Mean :0.2045
## 3rd Qu.:11.000 3rd Qu.:0.2440
## Max. :13.200 Max. :0.3470
table(semillas$arbol)
##
## 1 2
## 75 137
#función para definir estadĆsticos a calcular
mystats <- function(x){
m <- mean(x)
md <- median(x)
n <- length(x)
s <- sd(x)
se <- sd(x)/sqrt(length(x))
skew <- sum((x-m)^3/s^3)/n
kurt <- sum((x-m)^4/s^4)/n - 3
return(c(n=n, mean=m, median=md, stdev=s, serror=se, skew=skew, kurtosis=kurt))
}
# función sapply con split para aplicar la función a datos de cada Ôrbol
sapply(split(semillas$long.mm, semillas$arbol), mystats)
## 1 2
## n 75.0000000 137.00000000
## mean 11.5840000 9.88175182
## median 11.6000000 9.90000000
## stdev 0.9109009 0.55574790
## serror 0.1051818 0.04748075
## skew -0.2942260 0.02404127
## kurtosis -0.1749803 -0.16067636
INFORME: hacerlo para masa
# histograma y curva normal para cada arbol; variable longitud
# Ɣrbol 1
arbol1 <- semillas %>%
filter(arbol == "1")
msa1 <- semillas %>%
filter(arbol == "1") %>%
summarise(means = mean(long.mm), sd=sd(long.mm))
ggplot(arbol1, aes(x=long.mm)) +
geom_histogram(aes(y=..density..), position="identity", binwidth=.25, color="red", fill="gray") +
stat_function(fun = dnorm, color="red", size=1, args=list(mean=msa1$means, sd=msa1$sd)) +
labs(x="Longitud semilla, mm", y="densidad") +
annotate("text", x = 11, y = 1, label = "Arbol 1")
# Ɣrbol 2
arbol2 <- semillas %>%
filter(arbol == "2")
msa2 <- semillas %>%
filter(arbol == "2") %>%
summarise(means = mean(long.mm), sd=sd(long.mm))
ggplot(arbol2, aes(x=long.mm)) +
geom_histogram(aes(y=..density..), position="identity", binwidth=.25, color="red", fill="gray") +
stat_function(fun = dnorm, color="red", size=1, args=list(mean=msa2$means, sd=msa2$sd)) +
labs(x="Longitud semilla, mm", y="densidad") +
annotate("text", x = 11, y = 1, label = "Arbol 2")
#las dos curvas normales juntas
ggplot(semillas, aes(x=long.mm)) +
stat_function(fun = dnorm, args=list(mean=msa1$means, sd=msa1$sd), aes(colour = "Arbol 1"), size=1.5) +
stat_function(fun = dnorm, args=list(mean=msa2$means, sd=msa2$sd), aes(colour = "Arbol 2"), size=1.5) +
scale_colour_manual("T. populnea", values = c("blue","green")) +
labs(x="Longitud semilla, mm", y="densidad")
INFORME: hacerlo para masa ambos Ɣrboles
long <- ggplot(semillas, aes(x = arbol, y=long.mm, group=arbol)) + geom_boxplot(fill="cornflowerblue") +
stat_summary(aes(group=arbol), fun=mean, colour="darkred", geom="point", shape=18, size=3) +
labs(x = "Arbol", y = "Longitud semilla, mm")
long
INFORME: hacerlo para masa
#Q-Q plot
EnvStats::qqPlot(semillas$long.mm, add.line = TRUE, points.col = "blue", line.col = "red")
#prueba Shapiro-Wilk
shapiro.test(semillas$long.mm)
##
## Shapiro-Wilk normality test
##
## data: semillas$long.mm
## W = 0.94648, p-value = 4.524e-07
bartlett.test(long.mm ~ arbol, data=semillas)
##
## Bartlett test of homogeneity of variances
##
## data: long.mm by arbol
## Bartlett's K-squared = 24.688, df = 1, p-value = 6.741e-07
INFORME hacerlo para masa
analisis <- aov(long.mm ~ as.factor(arbol), data=semillas)
summary(analisis)
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(arbol) 1 140.4 140.44 285.2 <2e-16 ***
## Residuals 210 103.4 0.49
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
INFORME: hacerlo para masa
kruskal.test(long.mm ~ arbol, data = semillas)
##
## Kruskal-Wallis rank sum test
##
## data: long.mm by arbol
## Kruskal-Wallis chi-squared = 114.37, df = 1, p-value < 2.2e-16
INFORME: hacerlo para masa
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:EnvStats':
##
## qqPlot
## The following object is masked from 'package:dplyr':
##
## recode
# regresión lineal simple
rls_1 <- lm(long.mm ~ masa.g, data=semillas)
summary(rls_1)
##
## Call:
## lm(formula = long.mm ~ masa.g, data = semillas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1408 -0.5720 -0.0949 0.4670 3.1649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.0937 0.2272 35.63 <2e-16 ***
## masa.g 11.6869 1.0723 10.90 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8612 on 210 degrees of freedom
## Multiple R-squared: 0.3613, Adjusted R-squared: 0.3582
## F-statistic: 118.8 on 1 and 210 DF, p-value: < 2.2e-16
plot(long.mm ~ masa.g, data=semillas)
abline(rls_1)
#residuales
spreadLevelPlot(rls_1)
##
## Suggested power transformation: 0.5798792
rls_2 <- lm(long.mm ~ n.sem.fruto, data=semillas)
summary(rls_2)
##
## Call:
## lm(formula = long.mm ~ n.sem.fruto, data = semillas)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.58927 -0.60446 -0.00348 0.64064 2.01465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.84123 0.17844 66.359 < 2e-16 ***
## n.sem.fruto -0.12598 0.01544 -8.158 3.09e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.939 on 210 degrees of freedom
## Multiple R-squared: 0.2406, Adjusted R-squared: 0.237
## F-statistic: 66.55 on 1 and 210 DF, p-value: 3.094e-14
plot(long.mm ~ n.sem.fruto, data=semillas)
abline(rls_2)
#residuales
spreadLevelPlot(rls_2)
##
## Suggested power transformation: -7.46316
INFORME: hacerlo para masa