Attach dataset and Load Libraries

data1 <- read.csv(url("https://bit.ly/37E4vcp"), header = TRUE)
farmers <- data1[,c(2:4,15,26,37)]
library(dplyr)
str(farmers)
## 'data.frame':    72 obs. of  6 variables:
##  $ Educational.Attainment: chr  "College Level/Graduate" "College Level/Graduate" "College Level/Graduate" "College Level/Graduate" ...
##  $ Years.In.Farming      : chr  "1 year to 10 years" "1 year to 10 years" "1 year to 10 years" "21 years and above" ...
##  $ Employment            : chr  "Government Employee" "Government Employee" "Government Employee" "Government Employee" ...
##  $ APO                   : num  4.8 4.9 4.7 3.6 4 5 3.9 3.6 4.2 4.8 ...
##  $ PA                    : num  5 5 4.4 2.1 2.8 4.8 4.2 3 4.2 4.5 ...
##  $ CB                    : num  4.9 5 5 4.6 3.8 5 4.7 4.1 4.7 5 ...

Load Zimmerman (2017) Algorithm on the Selection between Parametric and Nonparametric Procedure

normality <- function(dependent, group, dataset){
  setClass("simple",
           representation(
             t1 = "numeric",
             t2 = "numeric",
             difference = "numeric"
           ))
  
  output <- list(t1 = t.test(dependent~group, data=dataset, var.equal=TRUE)$statistic,
                 t2 = t.test(rank(dependent)~group, data=dataset, var.equal=TRUE)$statistic,
                 difference = abs(t.test(dependent~group, data=dataset, var.equal=TRUE)$statistic - t.test(rank(dependent)~group, data=dataset, var.equal=TRUE)$statistic)
  )
  
  class(output) = "simple"
  
  print.simple <<- function(random){
    cat("T statistic of Raw Data: ", random$t1, "\n")
    cat("T statistic of Rank Data: ", random$t2, "\n")
    cat("Difference: ", random$difference, "\n\n")
    
    if(random$difference <= 0.40){
      cat("Decision: Assume that the variables are normally distirubted and use T-test. Check the number of observations per group","\n")
    }
    else{
      cat("Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test","\n")
    }
  }
  summary.simple <<- function(another){
    cat("T statistic of Raw Data: ", another$t1, "\n")
    cat("T statistic of Rank Data: ", another$t2, "\n")
    cat("Difference: ", another$difference, "\n\n")
    
    if(another$difference <= 0.40){
      cat("Decision: Assume that the variables are normally distirubted and use T-test. Check the number of observations per group.","\n")
    }
    else{
      cat("Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test","\n")
    }
  }
  output
}


normality2 <- function(dependent, group, dataset){
  setClass("simple",
           representation(
             t1 = "numeric",
             t2 = "numeric",
             difference = "numeric"
           ))
  
  output <- list(t1 = oneway.test(dependent~group, data=dataset)$statistic,
                 t2 = oneway.test(rank(dependent)~group, data=dataset)$statistic,
                 difference = abs(oneway.test(dependent~group, data=dataset)$statistic - oneway.test(rank(dependent)~group, data=dataset)$statistic)
  )
  
  class(output) = "simple"
  
  print.simple <<- function(random){
    cat("T statistic of Raw Data: ", random$t1, "\n")
    cat("T statistic of Rank Data: ", random$t2, "\n")
    cat("Difference: ", random$difference, "\n\n")
    
    if(random$difference <= 0.40){
      cat("Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group","\n")
    }
    else{
      cat("Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test","\n")
    }
  }
  summary.simple <<- function(another){
    cat("T statistic of Raw Data: ", another$t1, "\n")
    cat("T statistic of Rank Data: ", another$t2, "\n")
    cat("Difference: ", another$difference, "\n\n")
    
    if(another$difference <= 0.40){
      cat("Decision: Assume that the variables are normally distirubted and use ANOVA.. Check the number of observations per group.","\n")
    }
    else{
      cat("Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test","\n")
    }
  }
  output
}

Statement of the Problem 1

# Mean of Attainment of Program Objectives (APO)
mean(farmers$APO)
## [1] 4.169444
# SD of Attainment of Program Objectives (APO)
sd(farmers$APO)
## [1] 0.7016357
# Mean of Participation of Agencies (PA)
mean(farmers$PA)
## [1] 3.945833
# SD of Participation of Agencies (PA)
sd(farmers$PA)
## [1] 0.9347648
# Mean of Capacity Building (CB)
mean(farmers$CB)
## [1] 4.291667
# SD of Capacity Building (CB)
sd(farmers$CB)
## [1] 0.7437533

Statement of the Problem 2

# Descriptive Statistics of APO when grouped according to Educational Attainment
farmers %>%
  group_by(Educational.Attainment) %>%
  dplyr::summarise(Count = length(APO),
                   Mean = mean(APO),
                   SD = sd(APO))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
##   Educational.Attainment         Count  Mean    SD
##   <chr>                          <int> <dbl> <dbl>
## 1 Basic Education Level/Graduate    24  4.38 0.450
## 2 College Level/Graduate            48  4.06 0.782
# Descriptive Statistics of PA when grouped according to Educational Attainment
farmers %>%
  group_by(Educational.Attainment) %>%
  dplyr::summarise(Count = length(PA),
                   Mean = mean(PA),
                   SD = sd(PA))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
##   Educational.Attainment         Count  Mean    SD
##   <chr>                          <int> <dbl> <dbl>
## 1 Basic Education Level/Graduate    24  4.16 0.435
## 2 College Level/Graduate            48  3.84 1.09
# Descriptive Statistics of CB when grouped according to Educational Attainment
farmers %>%
  group_by(Educational.Attainment) %>%
  dplyr::summarise(Count = length(CB),
                   Mean = mean(CB),
                   SD = sd(CB))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
##   Educational.Attainment         Count  Mean    SD
##   <chr>                          <int> <dbl> <dbl>
## 1 Basic Education Level/Graduate    24  4.47 0.271
## 2 College Level/Graduate            48  4.20 0.880
# Zimmerman (2017) algorithm
normality(farmers$APO, farmers$Educational.Attainment, farmers)
## T statistic of Raw Data:  1.822513 
## T statistic of Rank Data:  1.411418 
## Difference:  0.4110955 
## 
## Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test
# Mann-Whitney U Test
wilcox.test(APO~Educational.Attainment, farmers)
## Warning in wilcox.test.default(x = c(3.9, 3.6, 4, 4.1, 4.7, 4.7, 4.1, 4.7, :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  APO by Educational.Attainment
## W = 693, p-value = 0.1628
## alternative hypothesis: true location shift is not equal to 0
# Zimmerman (2017) algorithm
normality(farmers$PA, farmers$Educational.Attainment, farmers)
## T statistic of Raw Data:  1.400097 
## T statistic of Rank Data:  0.3274115 
## Difference:  1.072686 
## 
## Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test
# Mann-Whitney U Test
wilcox.test(PA~Educational.Attainment, farmers)
## Warning in wilcox.test.default(x = c(4.2, 3.5, 3.8, 4.6, 4.7, 4.4, 3.9, : cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  PA by Educational.Attainment
## W = 603.5, p-value = 0.7463
## alternative hypothesis: true location shift is not equal to 0
# Zimmerman (2017) Algorithm
normality(farmers$CB, farmers$Educational.Attainment, farmers)
## T statistic of Raw Data:  1.456749 
## T statistic of Rank Data:  0.2802003 
## Difference:  1.176548 
## 
## Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test
# Mann-Whitney U Test
wilcox.test(CB~Educational.Attainment, farmers)
## Warning in wilcox.test.default(x = c(4.7, 4.8, 4.6, 4.7, 4.7, 4.3, 4.2, : cannot
## compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  CB by Educational.Attainment
## W = 599.5, p-value = 0.7825
## alternative hypothesis: true location shift is not equal to 0
# Descriptive Statistics of APO when grouped according to Years in Farming
farmers %>%
  group_by(Years.In.Farming) %>%
  dplyr::summarise(Count = length(APO),
                   Mean = mean(APO),
                   SD = sd(APO))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
##   Years.In.Farming     Count  Mean    SD
##   <chr>                <int> <dbl> <dbl>
## 1 1 year to 10 years      50  4.19 0.725
## 2 11 years to 20 years    11  4.19 0.670
## 3 21 years and above      11  4.05 0.670
# Descriptive Statistics of PA when grouped according to Years in Farming
farmers %>%
  group_by(Years.In.Farming) %>%
  dplyr::summarise(Count = length(PA),
                   Mean = mean(PA),
                   SD = sd(PA))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
##   Years.In.Farming     Count  Mean    SD
##   <chr>                <int> <dbl> <dbl>
## 1 1 year to 10 years      50  4.02 0.951
## 2 11 years to 20 years    11  4.06 0.880
## 3 21 years and above      11  3.48 0.846
# Descriptive Statistics of CB when grouped according to Years in Farming
farmers %>%
  group_by(Years.In.Farming) %>%
  dplyr::summarise(Count = length(CB),
                   Mean = mean(CB),
                   SD = sd(CB))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
##   Years.In.Farming     Count  Mean    SD
##   <chr>                <int> <dbl> <dbl>
## 1 1 year to 10 years      50  4.28 0.822
## 2 11 years to 20 years    11  4.35 0.513
## 3 21 years and above      11  4.29 0.591
# Zimmerman (2017) Algorithm
normality2(farmers$APO, farmers$Years.In.Farming, farmers)
## T statistic of Raw Data:  0.2104098 
## T statistic of Rank Data:  0.3003331 
## Difference:  0.0899233 
## 
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(APO~Years.In.Farming, farmers)

# Oneway ANOVA
summary(aov(APO~Years.In.Farming, farmers))
##                  Df Sum Sq Mean Sq F value Pr(>F)
## Years.In.Farming  2   0.20  0.0998   0.198  0.821
## Residuals        69  34.75  0.5037
# Zimmerman Algorithm
normality2(farmers$PA, farmers$Years.In.Farming, farmers)
## T statistic of Raw Data:  1.84054 
## T statistic of Rank Data:  3.505915 
## Difference:  1.665375 
## 
## Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test
# Kruskal Wallis Test
kruskal.test(PA~Years.In.Farming, farmers)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  PA by Years.In.Farming
## Kruskal-Wallis chi-squared = 4.6945, df = 2, p-value = 0.09563
# Zimmerman (2017) Algorithm
normality2(farmers$CB, farmers$Years.In.Farming, farmers)
## T statistic of Raw Data:  0.07870961 
## T statistic of Rank Data:  0.1250298 
## Difference:  0.04632019 
## 
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(CB~Years.In.Farming, farmers)

# Oneway ANOVA
summary(aov(CB~Years.In.Farming, farmers))
##                  Df Sum Sq Mean Sq F value Pr(>F)
## Years.In.Farming  2   0.05  0.0264   0.046  0.955
## Residuals        69  39.22  0.5684
# Descriptive Statistics of APO when grouped according to Employment
farmers %>%
  group_by(Employment) %>%
  dplyr::summarise(Count = length(APO),
                   Mean = mean(APO),
                   SD = sd(APO))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
##   Employment          Count  Mean    SD
##   <chr>               <int> <dbl> <dbl>
## 1 Government Employee    16  4.26 0.732
## 2 Private Employee       24  4    0.845
## 3 Self-employed          32  4.25 0.552
# Descriptive Statistics of PA when grouped according to Employment
farmers %>%
  group_by(Employment) %>%
  dplyr::summarise(Count = length(PA),
                   Mean = mean(PA),
                   SD = sd(PA))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
##   Employment          Count  Mean    SD
##   <chr>               <int> <dbl> <dbl>
## 1 Government Employee    16  3.75 1.28 
## 2 Private Employee       24  3.92 0.978
## 3 Self-employed          32  4.06 0.682
# Descriptive Statistics of CB when grouped according to Employment
farmers %>%
  group_by(Employment) %>%
  dplyr::summarise(Count = length(CB),
                   Mean = mean(CB),
                   SD = sd(CB))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
##   Employment          Count  Mean    SD
##   <chr>               <int> <dbl> <dbl>
## 1 Government Employee    16  4.36 0.866
## 2 Private Employee       24  4.07 0.934
## 3 Self-employed          32  4.43 0.443
# Zimmerman (2017) Algorithm
normality2(farmers$APO, farmers$Employment, farmers)
## T statistic of Raw Data:  0.8502508 
## T statistic of Rank Data:  0.8244798 
## Difference:  0.02577097 
## 
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(APO~Employment, farmers)

# Oneway ANOVA
summary(aov(APO~Employment, farmers))
##             Df Sum Sq Mean Sq F value Pr(>F)
## Employment   2   1.03  0.5169   1.051  0.355
## Residuals   69  33.92  0.4916
# Zimmerman (2017) Algorithm
normality2(farmers$PA, farmers$Employment, farmers)
## T statistic of Raw Data:  0.4866211 
## T statistic of Rank Data:  0.01679434 
## Difference:  0.4698268 
## 
## Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test
# Boxplot to check homoscedasticity
boxplot(PA~Employment, farmers)

# Oneway ANOVA
summary(aov(PA~Employment, farmers))
##             Df Sum Sq Mean Sq F value Pr(>F)
## Employment   2   1.04  0.5183   0.586  0.559
## Residuals   69  61.00  0.8841
# Zimmerman (2017) Algorithm
normality2(farmers$CB, farmers$Employment, farmers)
## T statistic of Raw Data:  1.508753 
## T statistic of Rank Data:  1.109504 
## Difference:  0.3992487 
## 
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(CB~Employment, farmers)

# Oneway ANOVA
summary(aov(CB~Employment, farmers))
##             Df Sum Sq Mean Sq F value Pr(>F)
## Employment   2   1.88  0.9388   1.732  0.185
## Residuals   69  37.40  0.5420

Session Info for Reproducibility

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_Philippines.1252  LC_CTYPE=English_Philippines.1252   
## [3] LC_MONETARY=English_Philippines.1252 LC_NUMERIC=C                        
## [5] LC_TIME=English_Philippines.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] dplyr_1.0.2
## 
## loaded via a namespace (and not attached):
##  [1] knitr_1.30       magrittr_1.5     tidyselect_1.1.0 R6_2.5.0        
##  [5] rlang_0.4.8      fansi_0.4.1      stringr_1.4.0    tools_4.0.3     
##  [9] xfun_0.18        utf8_1.1.4       cli_2.1.0        htmltools_0.5.0 
## [13] ellipsis_0.3.1   yaml_2.2.1       digest_0.6.27    assertthat_0.2.1
## [17] tibble_3.0.4     lifecycle_0.2.0  crayon_1.3.4     purrr_0.3.4     
## [21] vctrs_0.3.4      glue_1.4.2       evaluate_0.14    rmarkdown_2.5   
## [25] stringi_1.5.3    compiler_4.0.3   pillar_1.4.6     generics_0.0.2  
## [29] pkgconfig_2.0.3