setwd("G:/Mi unidad/Agrosavia/SOCODEVI/Informe final")
datos<-read.table("porcentaje_grano.csv", header=T, sep=';')
datos$Vereda<-as.factor(datos$Vereda)
library(ggplot2)
library(Rmisc)
## Cargando paquete requerido: lattice
## Cargando paquete requerido: plyr
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()   masks plyr::arrange()
## ✖ purrr::compact()   masks plyr::compact()
## ✖ dplyr::count()     masks plyr::count()
## ✖ dplyr::desc()      masks plyr::desc()
## ✖ dplyr::failwith()  masks plyr::failwith()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::id()        masks plyr::id()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::mutate()    masks plyr::mutate()
## ✖ dplyr::rename()    masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
## 
## Adjuntando el paquete: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate
library(rstatix)
## 
## Adjuntando el paquete: 'rstatix'
## 
## The following objects are masked from 'package:plyr':
## 
##     desc, mutate
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
##Summary statistics
summ<-datos %>%
  group_by(Vereda) %>%
  get_summary_stats(perc_dry, type = "mean_sd")
summ %>% as_tibble() %>% print(n=Inf)
## # A tibble: 3 × 5
##   Vereda      variable     n   mean    sd
##   <fct>       <fct>    <dbl>  <dbl> <dbl>
## 1 Caño Baul   perc_dry     7 -26.6  16.0 
## 2 Mataredonda perc_dry     9  -9.98  3.88
## 3 Mira Valle  perc_dry     9 -24.2   8.06
##Visualization
bxp <- ggboxplot(datos, x = "Vereda", y = "perc_dry",
  palette = "jco"
)
bxp

##Check assumptions
##Outliers

datos %>%
  group_by(Vereda) %>%
  identify_outliers(perc_dry)
## [1] Vereda     perc_wet   perc_dry2  perc_dry   Cd_cont    is.outlier is.extreme
## <0 rows> (o 0- extensión row.names)
##Normality assumption
##Compute Shapiro-Wilk test for each combinations of factor levels:

norm<-datos %>%
  group_by(Vereda) %>%
  shapiro_test(perc_dry)
norm %>% as_tibble() %>% print(n=Inf)
## # A tibble: 3 × 4
##   Vereda      variable statistic     p
##   <fct>       <chr>        <dbl> <dbl>
## 1 Caño Baul   perc_dry     0.936 0.607
## 2 Mataredonda perc_dry     0.939 0.574
## 3 Mira Valle  perc_dry     0.937 0.551
##Create QQ plot for each cell of design:

ggqqplot(datos, "perc_dry", ggtheme = theme_bw()) +
  facet_grid(~ Vereda, labeller = "label_both")

##Homogneity of variance assumption

##Compute the Levene’s test at each level of the within-subjects factor, here time variable:

lev<-datos %>%
  levene_test(perc_dry ~ Vereda)
lev %>% as_tibble() %>% print(n=Inf)
## # A tibble: 1 × 4
##     df1   df2 statistic       p
##   <int> <int>     <dbl>   <dbl>
## 1     2    22      7.68 0.00296
##Computation

#Table by error
res.aov.error <- aov(perc_dry ~ Vereda, datos)
res.aov.error
## Call:
##    aov(formula = perc_dry ~ Vereda, data = datos)
## 
## Terms:
##                  Vereda Residuals
## Sum of Squares  1365.43   2185.48
## Deg. of Freedom       2        22
## 
## Residual standard error: 9.966946
## Estimated effects may be unbalanced
summary(res.aov.error)
##             Df Sum Sq Mean Sq F value Pr(>F)   
## Vereda       2   1365   682.7   6.873 0.0048 **
## Residuals   22   2186    99.3                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Emmeans
emmip(res.aov.error, ~Vereda, CIs = T, xlab="Vereda", ylab="Almendra Cd [%]")

emm_Vereda <- emmeans(res.aov.error, pairwise ~ Vereda)
emm_Vereda
## $emmeans
##  Vereda      emmean   SE df lower.CL upper.CL
##  Caño Baul   -26.59 3.77 22    -34.4   -18.78
##  Mataredonda  -9.98 3.32 22    -16.9    -3.09
##  Mira Valle  -24.20 3.32 22    -31.1   -17.31
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate   SE df t.ratio p.value
##  Caño Baul - Mataredonda     -16.6 5.02 22  -3.308  0.0086
##  Caño Baul - Mira Valle       -2.4 5.02 22  -0.477  0.8827
##  Mataredonda - Mira Valle     14.2 4.70 22   3.027  0.0164
## 
## P value adjustment: tukey method for comparing a family of 3 estimates