library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggpubr)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(performance)
library(report)
## report is in alpha - help us improve by reporting bugs on github.com/easystats/report/issues
library(stringi)
library(ggrepel)
library(emmeans)
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## 
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
## 
##     cluster
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
library(TukeyC)
## Loading required package: doBy
## 
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
## 
##     order_by
y <- c(2370,
       1687,
       2592,
       2283,
       2910,
       3020,
       1282,
       1527,
       871,
       1025,
       825,
       920,
       562,
       321,
       636,
       317,
       485,
       842,
       173,
       127,
       132,
       150,
       129,
       227,
       193,
       71,
       82,
       62,
       96,
       44)


tr <- data.frame(trat = factor(rep(LETTERS[1:5], each = 6)), resp = y)

tr 
##    trat resp
## 1     A 2370
## 2     A 1687
## 3     A 2592
## 4     A 2283
## 5     A 2910
## 6     A 3020
## 7     B 1282
## 8     B 1527
## 9     B  871
## 10    B 1025
## 11    B  825
## 12    B  920
## 13    C  562
## 14    C  321
## 15    C  636
## 16    C  317
## 17    C  485
## 18    C  842
## 19    D  173
## 20    D  127
## 21    D  132
## 22    D  150
## 23    D  129
## 24    D  227
## 25    E  193
## 26    E   71
## 27    E   82
## 28    E   62
## 29    E   96
## 30    E   44
myColors <- get_palette(palette = "npg", length(levels(tr$trat)))
names(myColors) <- levels(tr$trat)

custom_colors_fill <- scale_fill_manual(name = "Trat", values = myColors) 
custom_colors_color <- scale_color_manual(name = "Trat", values = myColors)



tr %>% ggbarplot(x = "trat",
                   y = "resp",
                   fill = "trat",
                   position = position_dodge(),
                   add = "mean_sd") + 
          custom_colors_fill + theme_bw(15)

tr.av <- aov(resp ~ trat, data = tr)
anova(tr.av)
## Analysis of Variance Table
## 
## Response: resp
##           Df   Sum Sq Mean Sq F value    Pr(>F)    
## trat       4 23145259 5786315   81.79 5.505e-14 ***
## Residuals 25  1768643   70746                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv(tr.av)
##    cv 
## 30.74
tr.av %>% report
## For one-way between subjects designs, partial eta squared is equvilant to eta squared.
## Returning eta squared.
## The ANOVA (formula: resp ~ trat) suggests that:
## 
##   - The main effect of trat is significant and large (F(4, 25) = 81.79, p < .001; Eta2 = 0.93, 90% CI [0.88, 0.95])
## 
## Effect sizes were labelled following Field's (2013) recommendations.
TukeyC(tr.av) %>% summary
## Goups of means at sig.level = 0.05 
##     Means G1 G2 G3
## A 2477.00  a      
## B 1075.00     b   
## C  527.17        c
## D  156.33        c
## E   91.33        c
## 
## Matrix of the difference of means above diagonal and
## respective p-values of the Tukey test below diagonal values
##   A        B        C        D        E
## A 0 1402.000 1949.833 2320.667 2385.667
## B 0    0.000  547.833  918.667  983.667
## C 0    0.012    0.000  370.833  435.833
## D 0    0.000    0.144    0.000   65.000
## E 0    0.000    0.062    0.993    0.000
shapiro.test(tr.av$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  tr.av$residuals
## W = 0.89608, p-value = 0.006742
hist(tr.av$residuals)

bartlett.test(tr.av$residuals ~ tr$tra)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  tr.av$residuals by tr$tra
## Bartlett's K-squared = 29.586, df = 4, p-value = 5.942e-06
fligner.test(tr.av$residuals ~ tr$tra)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  tr.av$residuals by tr$tra
## Fligner-Killeen:med chi-squared = 15.408, df = 4, p-value = 0.003925
leastsquare = emmeans(tr.av,pairwise ~ trat,adjust="tukey") 

tk1 <- cld(leastsquare$emmeans,alpha=0.05,Letters=letters,reversed=T) %>% 
  mutate(.group = stri_replace_all_fixed(.$.group, " ", ""))

tk2 <- tr %>% group_by(trat) %>% get_summary_stats(resp, type = "mean_sd")
tk3 <-merge(tk1,tk2, by=c("trat")) 


resp <- tk3 %>% dplyr::select(trat,variable,mean,.group,sd) %>% 
  pivot_wider(names_from = c(variable),values_from = mean) %>% 
  mutate(resp_tukey=(paste(resp,.group, sep = "_"))) %>% 
  dplyr::rename(sd_resp=sd) %>%   
  dplyr::select(-.group) %>% 
  separate(resp_tukey, sep = "_", c("resp","Group_resp")) %>% 
  mutate(resp=as.numeric(resp))



resp %>% ggplot(aes(trat,resp,label=Group_resp, fill=trat)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = resp-sd_resp, ymax = resp + sd_resp),
                                                        position = "dodge",
                                                        width = 0.25) +
  geom_text(aes(label=Group_resp), hjust=-3, vjust=-0.5, size=6) + 
  theme_bw() + custom_colors_fill

check_model(tr.av, check = c("qq", "normality", "ncv", "homogeneity"))
## Loading required namespace: qqplotr
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

####################################
####################################
#===================================
# BoxCox - MASS package
#===================================
####################################
####################################

box.tr <- boxcox(tr.av, lambda =  seq(-1,1, 1/10))

(lambda <- box.tr$x[which(box.tr$y == max(box.tr$y))])
## [1] 0.1919192
tr$resp_new2 <- (tr$resp^(lambda))

ggscatter(tr,x="resp",y="resp_new2", col="trat") +theme_bw() + custom_colors_color

av.c <- aov(resp_new2 ~ trat, data = tr)

anova(av.c)
## Analysis of Variance Table
## 
## Response: resp_new2
##           Df  Sum Sq Mean Sq F value   Pr(>F)    
## trat       4 17.9965  4.4991  118.47 7.23e-16 ***
## Residuals 25  0.9494  0.0380                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv(av.c)
##   cv 
## 5.89
av.c %>% report
## For one-way between subjects designs, partial eta squared is equvilant to eta squared.
## Returning eta squared.
## The ANOVA (formula: resp_new2 ~ trat) suggests that:
## 
##   - The main effect of trat is significant and large (F(4, 25) = 118.47, p < .001; Eta2 = 0.95, 90% CI [0.91, 0.97])
## 
## Effect sizes were labelled following Field's (2013) recommendations.
TukeyC(av.c) 
## Results
##   Means G1 G2 G3 G4
## A  4.47  a         
## B  3.80     b      
## C  3.30        c   
## D  2.63           d
## E  2.34           d
## 
## Sig.level
##  0.05
## 
## Diff_Prob
##   A     B     C     D     E
## A 0 0.666 1.171 1.841 2.132
## B 0 0.000 0.504 1.175 1.466
## C 0 0.001 0.000 0.671 0.962
## D 0 0.000 0.000 0.000 0.291
## E 0 0.000 0.000 0.103 0.000
## 
## MSD
##      A    B    C    D    E
## A 0.00 0.33 0.33 0.33 0.33
## B 0.33 0.00 0.33 0.33 0.33
## C 0.33 0.33 0.00 0.33 0.33
## D 0.33 0.33 0.33 0.00 0.33
## E 0.33 0.33 0.33 0.33 0.00
shapiro.test(av.c$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  av.c$residuals
## W = 0.97207, p-value = 0.5972
hist(av.c$residuals)

bartlett.test(av.c$residuals ~ tr$tra)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  av.c$residuals by tr$tra
## Bartlett's K-squared = 2.7846, df = 4, p-value = 0.5945
fligner.test(av.c$residuals ~ tr$tra)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  av.c$residuals by tr$tra
## Fligner-Killeen:med chi-squared = 2.5493, df = 4, p-value = 0.6358
leastsquare = emmeans(av.c,pairwise ~ trat,adjust="tukey") 

tk1 <- cld(leastsquare$emmeans,alpha=0.05,Letters=letters,reversed=T)%>% 
  mutate(.group = stri_replace_all_fixed(.$.group, " ", ""))


tk2 <- tr %>% group_by(trat) %>% get_summary_stats(resp, type = "mean_sd")

tk3 <-merge(tk1,tk2, by=c("trat")) 

resp <- tk3 %>% dplyr::select(trat,variable,mean,.group,sd) %>% 
  pivot_wider(names_from = c(variable),values_from = mean) %>% 
  mutate(resp_tukey=(paste(resp,.group, sep = "_"))) %>% dplyr::rename(sd_resp=sd) %>%   dplyr::select(-.group)  %>% separate(resp_tukey, sep = "_", c("resp","Group_resp")) %>% mutate(resp=as.numeric(resp))



resp %>% ggplot(aes(trat,resp,label=Group_resp, fill=trat)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = resp-sd_resp, ymax = resp + sd_resp),
                position = "dodge",
                width = 0.25) +
  geom_text(aes(label=Group_resp), hjust=-3, vjust=-0.5, size=6) + 
  theme_bw() + custom_colors_fill

check_model(av.c, check = c("qq", "normality", "ncv", "homogeneity"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

####################################
####################################
#===================================
# BoxCox - caret package
#===================================
####################################
####################################

BCMod <- BoxCoxTrans(tr$resp, na.rm = TRUE)

BCMod
## Box-Cox Transformation
## 
## 30 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    44.0   136.5   523.5   865.4  1217.8  3020.0 
## 
## Largest/Smallest: 68.6 
## Sample Skewness: 1.07 
## 
## Estimated Lambda: 0 
## With fudge factor, Lambda = 0 will be used for transformations
tr <- cbind(tr, resp_new=predict(BCMod, tr$resp)) 

tr
##    trat resp resp_new2 resp_new
## 1     A 2370  4.443027 7.770645
## 2     A 1687  4.162414 7.430707
## 3     A 2592  4.520038 7.860185
## 4     A 2283  4.411251 7.733246
## 5     A 2910  4.621549 7.975908
## 6     A 3020  4.654576 8.013012
## 7     B 1282  3.948784 7.156177
## 8     B 1527  4.083568 7.331060
## 9     B  871  3.666451 6.769642
## 10    B 1025  3.782820 6.932448
## 11    B  825  3.628470 6.715383
## 12    B  920  3.705167 6.824374
## 13    C  562  3.370755 6.331502
## 14    C  321  3.027237 5.771441
## 15    C  636  3.451733 6.455199
## 16    C  317  3.019961 5.758902
## 17    C  485  3.276766 6.184149
## 18    C  842  3.642701 6.735780
## 19    D  173  2.688586 5.153292
## 20    D  127  2.533730 4.844187
## 21    D  132  2.552577 4.882802
## 22    D  150  2.615976 5.010635
## 23    D  129  2.541339 4.859812
## 24    D  227  2.832478 5.424950
## 25    E  193  2.745632 5.262690
## 26    E   71  2.266167 4.262680
## 27    E   82  2.329687 4.406719
## 28    E   62  2.207976 4.127134
## 29    E   96  2.401242 4.564348
## 30    E   44  2.067331 3.784190
ggscatter(tr,x="resp",y="resp_new", col="trat") +theme_bw() + custom_colors_color

tr.av1 <- aov(resp_new ~ trat, data = tr)


anova(tr.av1)
## Analysis of Variance Table
## 
## Response: resp_new
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## trat       4 45.914 11.4786  103.93 3.381e-15 ***
## Residuals 25  2.761  0.1104                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv(tr.av1)
##   cv 
## 5.47
tr.av1 %>% report
## For one-way between subjects designs, partial eta squared is equvilant to eta squared.
## Returning eta squared.
## The ANOVA (formula: resp_new ~ trat) suggests that:
## 
##   - The main effect of trat is significant and large (F(4, 25) = 103.93, p < .001; Eta2 = 0.94, 90% CI [0.90, 0.96])
## 
## Effect sizes were labelled following Field's (2013) recommendations.
TukeyC(tr.av1) %>% summary()
## Goups of means at sig.level = 0.05 
##   Means G1 G2 G3 G4 G5
## A  7.80  a            
## B  6.95     b         
## C  6.21        c      
## D  5.03           d   
## E  4.40              e
## 
## Matrix of the difference of means above diagonal and
## respective p-values of the Tukey test below diagonal values
##       A     B     C     D     E
## A 0.000 0.842 1.591 2.768 3.396
## B 0.002 0.000 0.749 1.926 2.554
## C 0.000 0.005 0.000 1.177 1.805
## D 0.000 0.000 0.000 0.000 0.628
## E 0.000 0.000 0.000 0.024 0.000
shapiro.test(tr.av1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  tr.av1$residuals
## W = 0.9764, p-value = 0.724
hist(tr.av1$residuals)

bartlett.test(tr.av1$residuals ~ tr$tra)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  tr.av1$residuals by tr$tra
## Bartlett's K-squared = 5.5731, df = 4, p-value = 0.2334
fligner.test(tr.av1$residuals ~ tr$tra)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  tr.av1$residuals by tr$tra
## Fligner-Killeen:med chi-squared = 4.8557, df = 4, p-value = 0.3024
leastsquare = emmeans(tr.av1,pairwise ~ trat,adjust="tukey") 

tk1 <- cld(leastsquare$emmeans,alpha=0.05,Letters=letters,reversed=T)%>% 
       mutate(.group = stri_replace_all_fixed(.$.group, " ", ""))


tk2 <- tr %>% group_by(trat) %>% get_summary_stats(resp, type = "mean_sd")

tk3 <-merge(tk1,tk2, by=c("trat")) 

resp <- tk3 %>% dplyr::select(trat,variable,mean,.group,sd) %>% 
  pivot_wider(names_from = c(variable),values_from = mean) %>% 
  mutate(resp_tukey=(paste(resp,.group, sep = "_"))) %>% dplyr::rename(sd_resp=sd) %>%   dplyr::select(-.group)  %>% separate(resp_tukey, sep = "_", c("resp","Group_resp")) %>% mutate(resp=as.numeric(resp))



resp %>% ggplot(aes(trat,resp,label=Group_resp, fill=trat)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = resp-sd_resp, ymax = resp + sd_resp),
                position = "dodge",
                width = 0.25) +
  geom_text(aes(label=Group_resp), hjust=-3, vjust=-0.5, size=6) + 
  theme_bw() + custom_colors_fill

check_model(tr.av1, check = c("qq", "normality", "ncv", "homogeneity"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

####################################
####################################
#===================================
# Normalize - QuantPsyc package
#===================================
####################################
####################################



tr$resp_norm <- QuantPsyc::Normalize(tr$resp)



tr.av2 <- aov(resp_norm ~ trat, data = tr)


anova(tr.av2)
## Analysis of Variance Table
## 
## Response: resp_norm
##           Df   Sum Sq Mean Sq F value   Pr(>F)    
## trat       4 18825118 4706280  54.986 4.99e-12 ***
## Residuals 25  2139760   85590                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cv(tr.av2)
##    cv 
## 33.81
tr.av2 %>% report
## For one-way between subjects designs, partial eta squared is equvilant to eta squared.
## Returning eta squared.
## The ANOVA (formula: resp_norm ~ trat) suggests that:
## 
##   - The main effect of trat is significant and large (F(4, 25) = 54.99, p < .001; Eta2 = 0.90, 90% CI [0.82, 0.93])
## 
## Effect sizes were labelled following Field's (2013) recommendations.
TukeyC(tr.av2) %>% summary()
## Warning in qt(sig.level, aux_mt$coef[, 3]): NaNs produced

## Warning in qt(sig.level, aux_mt$coef[, 3]): NaNs produced

## Warning in qt(sig.level, aux_mt$coef[, 3]): NaNs produced

## Warning in qt(sig.level, aux_mt$coef[, 3]): NaNs produced
## Goups of means at sig.level = 0.05 
##     Means G1 G2 G3 G4
## A 2047.40  a         
## B 1327.39     b      
## C  878.24     b      
## D  314.37        c   
## E -240.57           d
## 
## Matrix of the difference of means above diagonal and
## respective p-values of the Tukey test below diagonal values
##       A       B        C        D        E
## A 0.000 720.013 1169.157 1733.027 2287.969
## B 0.002   0.000  449.145 1013.014 1567.957
## C 0.000   0.090    0.000  563.870 1118.812
## D 0.000   0.000    0.020    0.000  554.942
## E 0.000   0.000    0.000    0.023    0.000
shapiro.test(tr.av2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  tr.av2$residuals
## W = 0.9704, p-value = 0.5503
hist(tr.av2$residuals)

bartlett.test(tr.av2$residuals ~ tr$tra)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  tr.av2$residuals by tr$tra
## Bartlett's K-squared = 8.0065, df = 4, p-value = 0.09134
fligner.test(tr.av2$residuals ~ tr$tra)
## 
##  Fligner-Killeen test of homogeneity of variances
## 
## data:  tr.av2$residuals by tr$tra
## Fligner-Killeen:med chi-squared = 5.2105, df = 4, p-value = 0.2664
leastsquare = emmeans(tr.av2,pairwise ~ trat,adjust="tukey") 

tk1 <- cld(leastsquare$emmeans,alpha=0.05,Letters=letters,reversed=T)%>% 
  mutate(.group = stri_replace_all_fixed(.$.group, " ", ""))


tk2 <- tr %>% group_by(trat) %>% get_summary_stats(resp, type = "mean_sd")

tk3 <-merge(tk1,tk2, by=c("trat")) 

resp <- tk3 %>% dplyr::select(trat,variable,mean,.group,sd) %>% 
  pivot_wider(names_from = c(variable),values_from = mean) %>% 
  mutate(resp_tukey=(paste(resp,.group, sep = "_"))) %>% dplyr::rename(sd_resp=sd) %>%   dplyr::select(-.group)  %>% separate(resp_tukey, sep = "_", c("resp","Group_resp")) %>% mutate(resp=as.numeric(resp))



resp %>% ggplot(aes(trat,resp,label=Group_resp, fill=trat)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = resp-sd_resp, ymax = resp + sd_resp),
                position = "dodge",
                width = 0.25) +
  geom_text(aes(label=Group_resp), hjust=-3, vjust=-0.5, size=6) + 
  theme_bw() + custom_colors_fill

check_model(tr.av2, check = c("qq", "normality", "ncv", "homogeneity"))
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'