setwd("G:/Mi unidad/Agrosavia/SOCODEVI/Informe final")
datos<-read.table("porcentaje_testa.csv", header=T, sep=';')
datos$Vereda<-as.factor(datos$Vereda)
library(ggplot2)
library(Rmisc)
## Cargando paquete requerido: lattice
## Cargando paquete requerido: plyr
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()   masks plyr::arrange()
## ✖ purrr::compact()   masks plyr::compact()
## ✖ dplyr::count()     masks plyr::count()
## ✖ dplyr::desc()      masks plyr::desc()
## ✖ dplyr::failwith()  masks plyr::failwith()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::id()        masks plyr::id()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::mutate()    masks plyr::mutate()
## ✖ dplyr::rename()    masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
## 
## Adjuntando el paquete: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate
library(rstatix)
## 
## Adjuntando el paquete: 'rstatix'
## 
## The following objects are masked from 'package:plyr':
## 
##     desc, mutate
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
##Summary statistics
summ<-datos %>%
  group_by(Vereda) %>%
  get_summary_stats(perc_dry, type = "mean_sd")
summ %>% as_tibble() %>% print(n=Inf)
## # A tibble: 3 × 5
##   Vereda      variable     n  mean    sd
##   <fct>       <fct>    <dbl> <dbl> <dbl>
## 1 Caño Baul   perc_dry     9 169.  113. 
## 2 Mataredonda perc_dry     9  71.5  25.2
## 3 Mira Valle  perc_dry     9 140.   43.2
##Visualization
bxp <- ggboxplot(datos, x = "Vereda", y = "perc_dry",
                 palette = "jco"
)
bxp

##Check assumptions
##Outliers

datos %>%
  group_by(Vereda) %>%
  identify_outliers(perc_dry)
## # A tibble: 2 × 7
##   Vereda      perc_wet perc_dry2 perc_dry Cd_cont is.outlier is.extreme
##   <fct>          <dbl>     <dbl>    <dbl> <chr>   <lgl>      <lgl>     
## 1 Caño Baul       4.20      4.00     400. Medio   TRUE       FALSE     
## 2 Mataredonda     1.33      1.19     119. Alto    TRUE       FALSE
##Normality assumption
##Compute Shapiro-Wilk test for each combinations of factor levels:

norm<-datos %>%
  group_by(Vereda) %>%
  shapiro_test(perc_dry)
norm %>% as_tibble() %>% print(n=Inf)
## # A tibble: 3 × 4
##   Vereda      variable statistic      p
##   <fct>       <chr>        <dbl>  <dbl>
## 1 Caño Baul   perc_dry     0.896 0.230 
## 2 Mataredonda perc_dry     0.956 0.759 
## 3 Mira Valle  perc_dry     0.851 0.0768
##Create QQ plot for each cell of design:

ggqqplot(datos, "perc_dry", ggtheme = theme_bw()) +
  facet_grid(~ Vereda, labeller = "label_both")

##Homogneity of variance assumption

##Compute the Levene’s test at each level of the within-subjects factor, here time variable:

lev<-datos %>%
  levene_test(perc_dry ~ Vereda)
lev %>% as_tibble() %>% print(n=Inf)
## # A tibble: 1 × 4
##     df1   df2 statistic      p
##   <int> <int>     <dbl>  <dbl>
## 1     2    24      4.48 0.0222
##Computation

#Table by error
res.aov.error <- aov(perc_dry ~ Vereda, datos)
res.aov.error
## Call:
##    aov(formula = perc_dry ~ Vereda, data = datos)
## 
## Terms:
##                    Vereda Residuals
## Sum of Squares   44927.52 121613.64
## Deg. of Freedom         2        24
## 
## Residual standard error: 71.18451
## Estimated effects may be unbalanced
summary(res.aov.error)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Vereda       2  44928   22464   4.433  0.023 *
## Residuals   24 121614    5067                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Emmeans
emmip(res.aov.error, ~Vereda, CIs = T, xlab="Vereda", ylab="Testa Cd [%]")

emm_Vereda <- emmeans(res.aov.error, pairwise ~ Vereda)
emm_Vereda
## $emmeans
##  Vereda      emmean   SE df lower.CL upper.CL
##  Caño Baul    168.9 23.7 24    120.0      218
##  Mataredonda   71.5 23.7 24     22.6      121
##  Mira Valle   139.6 23.7 24     90.6      189
## 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                 estimate   SE df t.ratio p.value
##  Caño Baul - Mataredonda      97.4 33.6 24   2.902  0.0206
##  Caño Baul - Mira Valle       29.4 33.6 24   0.875  0.6607
##  Mataredonda - Mira Valle    -68.0 33.6 24  -2.027  0.1273
## 
## P value adjustment: tukey method for comparing a family of 3 estimates