Load Zimmerman (2017) Algorithm on the Selection between Parametric and Nonparametric Procedure
normality <- function(dependent, group, dataset){
setClass("simple",
representation(
t1 = "numeric",
t2 = "numeric",
difference = "numeric"
))
output <- list(t1 = t.test(dependent~group, data=dataset, var.equal=TRUE)$statistic,
t2 = t.test(rank(dependent)~group, data=dataset, var.equal=TRUE)$statistic,
difference = abs(t.test(dependent~group, data=dataset, var.equal=TRUE)$statistic - t.test(rank(dependent)~group, data=dataset, var.equal=TRUE)$statistic)
)
class(output) = "simple"
print.simple <<- function(random){
cat("T statistic of Raw Data: ", random$t1, "\n")
cat("T statistic of Rank Data: ", random$t2, "\n")
cat("Difference: ", random$difference, "\n\n")
if(random$difference <= 0.40){
cat("Decision: Assume that the variables are normally distirubted and use T-test. Check the number of observations per group","\n")
}
else{
cat("Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test","\n")
}
}
summary.simple <<- function(another){
cat("T statistic of Raw Data: ", another$t1, "\n")
cat("T statistic of Rank Data: ", another$t2, "\n")
cat("Difference: ", another$difference, "\n\n")
if(another$difference <= 0.40){
cat("Decision: Assume that the variables are normally distirubted and use T-test. Check the number of observations per group.","\n")
}
else{
cat("Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test","\n")
}
}
output
}
normality2 <- function(dependent, group, dataset){
setClass("simple",
representation(
t1 = "numeric",
t2 = "numeric",
difference = "numeric"
))
output <- list(t1 = oneway.test(dependent~group, data=dataset)$statistic,
t2 = oneway.test(rank(dependent)~group, data=dataset)$statistic,
difference = abs(oneway.test(dependent~group, data=dataset)$statistic - oneway.test(rank(dependent)~group, data=dataset)$statistic)
)
class(output) = "simple"
print.simple <<- function(random){
cat("T statistic of Raw Data: ", random$t1, "\n")
cat("T statistic of Rank Data: ", random$t2, "\n")
cat("Difference: ", random$difference, "\n\n")
if(random$difference <= 0.40){
cat("Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group","\n")
}
else{
cat("Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test","\n")
}
}
summary.simple <<- function(another){
cat("T statistic of Raw Data: ", another$t1, "\n")
cat("T statistic of Rank Data: ", another$t2, "\n")
cat("Difference: ", another$difference, "\n\n")
if(another$difference <= 0.40){
cat("Decision: Assume that the variables are normally distirubted and use ANOVA.. Check the number of observations per group.","\n")
}
else{
cat("Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test","\n")
}
}
output
}
Statement of the Problem 2
# Descriptive Statistics of APO when grouped according to Educational Attainment
farmers %>%
group_by(Educational.Attainment) %>%
dplyr::summarise(Count = length(APO),
Mean = mean(APO),
SD = sd(APO))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
## Educational.Attainment Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 Basic Education Level/Graduate 24 4.38 0.450
## 2 College Level/Graduate 48 4.06 0.782
# Descriptive Statistics of PA when grouped according to Educational Attainment
farmers %>%
group_by(Educational.Attainment) %>%
dplyr::summarise(Count = length(PA),
Mean = mean(PA),
SD = sd(PA))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
## Educational.Attainment Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 Basic Education Level/Graduate 24 4.16 0.435
## 2 College Level/Graduate 48 3.84 1.09
# Descriptive Statistics of CB when grouped according to Educational Attainment
farmers %>%
group_by(Educational.Attainment) %>%
dplyr::summarise(Count = length(CB),
Mean = mean(CB),
SD = sd(CB))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 4
## Educational.Attainment Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 Basic Education Level/Graduate 24 4.47 0.271
## 2 College Level/Graduate 48 4.20 0.880
# Zimmerman (2017) algorithm
normality(farmers$APO, farmers$Educational.Attainment, farmers)
## T statistic of Raw Data: 1.822513
## T statistic of Rank Data: 1.411418
## Difference: 0.4110955
##
## Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test
# Mann-Whitney U Test
wilcox.test(APO~Educational.Attainment, farmers)
## Warning in wilcox.test.default(x = c(3.9, 3.6, 4, 4.1, 4.7, 4.7, 4.1, 4.7, :
## cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: APO by Educational.Attainment
## W = 693, p-value = 0.1628
## alternative hypothesis: true location shift is not equal to 0
# Zimmerman (2017) algorithm
normality(farmers$PA, farmers$Educational.Attainment, farmers)
## T statistic of Raw Data: 1.400097
## T statistic of Rank Data: 0.3274115
## Difference: 1.072686
##
## Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test
# Mann-Whitney U Test
wilcox.test(PA~Educational.Attainment, farmers)
## Warning in wilcox.test.default(x = c(4.2, 3.5, 3.8, 4.6, 4.7, 4.4, 3.9, : cannot
## compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: PA by Educational.Attainment
## W = 603.5, p-value = 0.7463
## alternative hypothesis: true location shift is not equal to 0
# Zimmerman (2017) Algorithm
normality(farmers$CB, farmers$Educational.Attainment, farmers)
## T statistic of Raw Data: 1.456749
## T statistic of Rank Data: 0.2802003
## Difference: 1.176548
##
## Decision: Assume that the variables are not normally distirubted and use Mann-Whitney Test
# Mann-Whitney U Test
wilcox.test(CB~Educational.Attainment, farmers)
## Warning in wilcox.test.default(x = c(4.7, 4.8, 4.6, 4.7, 4.7, 4.3, 4.2, : cannot
## compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: CB by Educational.Attainment
## W = 599.5, p-value = 0.7825
## alternative hypothesis: true location shift is not equal to 0
# Descriptive Statistics of APO when grouped according to Years in Farming
farmers %>%
group_by(Years.In.Farming) %>%
dplyr::summarise(Count = length(APO),
Mean = mean(APO),
SD = sd(APO))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
## Years.In.Farming Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 1 year to 10 years 50 4.19 0.725
## 2 11 years to 20 years 11 4.19 0.670
## 3 21 years and above 11 4.05 0.670
# Descriptive Statistics of PA when grouped according to Years in Farming
farmers %>%
group_by(Years.In.Farming) %>%
dplyr::summarise(Count = length(PA),
Mean = mean(PA),
SD = sd(PA))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
## Years.In.Farming Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 1 year to 10 years 50 4.02 0.951
## 2 11 years to 20 years 11 4.06 0.880
## 3 21 years and above 11 3.48 0.846
# Descriptive Statistics of CB when grouped according to Years in Farming
farmers %>%
group_by(Years.In.Farming) %>%
dplyr::summarise(Count = length(CB),
Mean = mean(CB),
SD = sd(CB))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
## Years.In.Farming Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 1 year to 10 years 50 4.28 0.822
## 2 11 years to 20 years 11 4.35 0.513
## 3 21 years and above 11 4.29 0.591
# Zimmerman (2017) Algorithm
normality2(farmers$APO, farmers$Years.In.Farming, farmers)
## T statistic of Raw Data: 0.2104098
## T statistic of Rank Data: 0.3003331
## Difference: 0.0899233
##
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(APO~Years.In.Farming, farmers)

# Oneway ANOVA
summary(aov(APO~Years.In.Farming, farmers))
## Df Sum Sq Mean Sq F value Pr(>F)
## Years.In.Farming 2 0.20 0.0998 0.198 0.821
## Residuals 69 34.75 0.5037
# Zimmerman Algorithm
normality2(farmers$PA, farmers$Years.In.Farming, farmers)
## T statistic of Raw Data: 1.84054
## T statistic of Rank Data: 3.505915
## Difference: 1.665375
##
## Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test
# Kruskal Wallis Test
kruskal.test(PA~Years.In.Farming, farmers)
##
## Kruskal-Wallis rank sum test
##
## data: PA by Years.In.Farming
## Kruskal-Wallis chi-squared = 4.6945, df = 2, p-value = 0.09563
# Zimmerman (2017) Algorithm
normality2(farmers$CB, farmers$Years.In.Farming, farmers)
## T statistic of Raw Data: 0.07870961
## T statistic of Rank Data: 0.1250298
## Difference: 0.04632019
##
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(CB~Years.In.Farming, farmers)

# Oneway ANOVA
summary(aov(CB~Years.In.Farming, farmers))
## Df Sum Sq Mean Sq F value Pr(>F)
## Years.In.Farming 2 0.05 0.0264 0.046 0.955
## Residuals 69 39.22 0.5684
# Descriptive Statistics of APO when grouped according to Employment
farmers %>%
group_by(Employment) %>%
dplyr::summarise(Count = length(APO),
Mean = mean(APO),
SD = sd(APO))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
## Employment Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 Government Employee 16 4.26 0.732
## 2 Private Employee 24 4 0.845
## 3 Self-employed 32 4.25 0.552
# Descriptive Statistics of PA when grouped according to Employment
farmers %>%
group_by(Employment) %>%
dplyr::summarise(Count = length(PA),
Mean = mean(PA),
SD = sd(PA))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
## Employment Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 Government Employee 16 3.75 1.28
## 2 Private Employee 24 3.92 0.978
## 3 Self-employed 32 4.06 0.682
# Descriptive Statistics of CB when grouped according to Employment
farmers %>%
group_by(Employment) %>%
dplyr::summarise(Count = length(CB),
Mean = mean(CB),
SD = sd(CB))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 4
## Employment Count Mean SD
## <chr> <int> <dbl> <dbl>
## 1 Government Employee 16 4.36 0.866
## 2 Private Employee 24 4.07 0.934
## 3 Self-employed 32 4.43 0.443
# Zimmerman (2017) Algorithm
normality2(farmers$APO, farmers$Employment, farmers)
## T statistic of Raw Data: 0.8502508
## T statistic of Rank Data: 0.8244798
## Difference: 0.02577097
##
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(APO~Employment, farmers)

# Oneway ANOVA
summary(aov(APO~Employment, farmers))
## Df Sum Sq Mean Sq F value Pr(>F)
## Employment 2 1.03 0.5169 1.051 0.355
## Residuals 69 33.92 0.4916
# Zimmerman (2017) Algorithm
normality2(farmers$PA, farmers$Employment, farmers)
## T statistic of Raw Data: 0.4866211
## T statistic of Rank Data: 0.01679434
## Difference: 0.4698268
##
## Decision: Assume that the variables are not normally distirubted and use Kruskal-Wallis Test
# Boxplot to check homoscedasticity
boxplot(PA~Employment, farmers)

# Oneway ANOVA
summary(aov(PA~Employment, farmers))
## Df Sum Sq Mean Sq F value Pr(>F)
## Employment 2 1.04 0.5183 0.586 0.559
## Residuals 69 61.00 0.8841
# Zimmerman (2017) Algorithm
normality2(farmers$CB, farmers$Employment, farmers)
## T statistic of Raw Data: 1.508753
## T statistic of Rank Data: 1.109504
## Difference: 0.3992487
##
## Decision: Assume that the variables are normally distirubted and use ANOVA. Check the number of observations per group
# Boxplot to check homoscedasticity
boxplot(CB~Employment, farmers)

# Oneway ANOVA
summary(aov(CB~Employment, farmers))
## Df Sum Sq Mean Sq F value Pr(>F)
## Employment 2 1.88 0.9388 1.732 0.185
## Residuals 69 37.40 0.5420