setwd("G:/Mi unidad/Agrosavia/SOCODEVI/Informe final")
datos<-read.table("porcentaje_grano.csv", header=T, sep=';')
datos$Vereda<-as.factor(datos$Vereda)
library(ggplot2)
library(Rmisc)
## Cargando paquete requerido: lattice
## Cargando paquete requerido: plyr
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange() masks plyr::arrange()
## ✖ purrr::compact() masks plyr::compact()
## ✖ dplyr::count() masks plyr::count()
## ✖ dplyr::desc() masks plyr::desc()
## ✖ dplyr::failwith() masks plyr::failwith()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::id() masks plyr::id()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::mutate() masks plyr::mutate()
## ✖ dplyr::rename() masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
##
## Adjuntando el paquete: 'ggpubr'
##
## The following object is masked from 'package:plyr':
##
## mutate
library(rstatix)
##
## Adjuntando el paquete: 'rstatix'
##
## The following objects are masked from 'package:plyr':
##
## desc, mutate
##
## The following object is masked from 'package:stats':
##
## filter
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
##Summary statistics
summ<-datos %>%
group_by(Vereda) %>%
get_summary_stats(perc_dry, type = "mean_sd")
summ %>% as_tibble() %>% print(n=Inf)
## # A tibble: 3 × 5
## Vereda variable n mean sd
## <fct> <fct> <dbl> <dbl> <dbl>
## 1 Caño Baul perc_dry 7 -26.6 16.0
## 2 Mataredonda perc_dry 9 -9.98 3.88
## 3 Mira Valle perc_dry 9 -24.2 8.06
##Visualization
bxp <- ggboxplot(datos, x = "Vereda", y = "perc_dry",
palette = "jco"
)
bxp

##Check assumptions
##Outliers
datos %>%
group_by(Vereda) %>%
identify_outliers(perc_dry)
## [1] Vereda perc_wet perc_dry2 perc_dry Cd_cont is.outlier is.extreme
## <0 rows> (o 0- extensión row.names)
##Normality assumption
##Compute Shapiro-Wilk test for each combinations of factor levels:
norm<-datos %>%
group_by(Vereda) %>%
shapiro_test(perc_dry)
norm %>% as_tibble() %>% print(n=Inf)
## # A tibble: 3 × 4
## Vereda variable statistic p
## <fct> <chr> <dbl> <dbl>
## 1 Caño Baul perc_dry 0.936 0.607
## 2 Mataredonda perc_dry 0.939 0.574
## 3 Mira Valle perc_dry 0.937 0.551
##Create QQ plot for each cell of design:
ggqqplot(datos, "perc_dry", ggtheme = theme_bw()) +
facet_grid(~ Vereda, labeller = "label_both")

##Homogneity of variance assumption
##Compute the Levene’s test at each level of the within-subjects factor, here time variable:
lev<-datos %>%
levene_test(perc_dry ~ Vereda)
lev %>% as_tibble() %>% print(n=Inf)
## # A tibble: 1 × 4
## df1 df2 statistic p
## <int> <int> <dbl> <dbl>
## 1 2 22 7.68 0.00296
##Computation
#Table by error
res.aov.error <- aov(perc_dry ~ Vereda, datos)
res.aov.error
## Call:
## aov(formula = perc_dry ~ Vereda, data = datos)
##
## Terms:
## Vereda Residuals
## Sum of Squares 1365.43 2185.48
## Deg. of Freedom 2 22
##
## Residual standard error: 9.966946
## Estimated effects may be unbalanced
summary(res.aov.error)
## Df Sum Sq Mean Sq F value Pr(>F)
## Vereda 2 1365 682.7 6.873 0.0048 **
## Residuals 22 2186 99.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Emmeans
emmip(res.aov.error, ~Vereda, CIs = T, xlab="Vereda", ylab="Almendra Cd [%]")

emm_Vereda <- emmeans(res.aov.error, pairwise ~ Vereda)
emm_Vereda
## $emmeans
## Vereda emmean SE df lower.CL upper.CL
## Caño Baul -26.59 3.77 22 -34.4 -18.78
## Mataredonda -9.98 3.32 22 -16.9 -3.09
## Mira Valle -24.20 3.32 22 -31.1 -17.31
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## Caño Baul - Mataredonda -16.6 5.02 22 -3.308 0.0086
## Caño Baul - Mira Valle -2.4 5.02 22 -0.477 0.8827
## Mataredonda - Mira Valle 14.2 4.70 22 3.027 0.0164
##
## P value adjustment: tukey method for comparing a family of 3 estimates