norm1 <- rnorm(10000,0.5,2)
norm2 <- rnorm(10000,0.5,2.3)
right <- rbeta(10000,1.5,5)
left <- rbeta(10000,5,1)
par(mfrow = c(1,4))
hist(norm1)
hist(norm2)
hist(right)
hist(left)
dev.off()
## null device
## 1
options(scipen = 999)
min(right)
## [1] 0.0004774916
max(right)
## [1] 0.944581
mean(right)
## [1] 0.2302432
mean(right, trim = 0.1)
## [1] 0.2155755
median(right)
## [1] 0.2023117
mean(right, trim = 0.5)
## [1] 0.2023117
mean(norm1)
## [1] 0.4917642
mean(norm1, trim = 0.1)
## [1] 0.4907483
library(psych) # check out the 'trimmed' statistic:
describe(right)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 10000 0.23 0.15 0.2 0.22 0.16 0 0.94 0.94 0.82 0.26 0
df <- as.data.frame(cbind(norm1, norm2, right, left))
library(tidyverse)
library(reshape2)
long <- melt(df)
str(long)
## 'data.frame': 40000 obs. of 2 variables:
## $ variable: Factor w/ 4 levels "norm1","norm2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ value : num 0.2285 2.1274 -1.9555 -0.0858 3.5571 ...
names(long) <- c("group", "value")
summary(long)
## group value
## norm1:10000 Min. :-9.30172
## norm2:10000 1st Qu.: 0.07465
## right:10000 Median : 0.52423
## left :10000 Mean : 0.51604
## 3rd Qu.: 0.96431
## Max. : 8.65352
Look at the trimmed mean and compare it to the mean:
describeBy(long, long$group)
##
## Descriptive statistics by group
## group: norm1
## vars n mean sd median trimmed mad min max range skew kurtosis
## group* 1 10000 1.00 0 1.00 1.00 0.00 1.00 1.00 0.00 NaN NaN
## value 2 10000 0.49 2 0.48 0.49 2.01 -7.87 7.64 15.51 -0.01 -0.03
## se
## group* 0.00
## value 0.02
## ------------------------------------------------------------
## group: norm2
## vars n mean sd median trimmed mad min max range skew kurtosis
## group* 1 10000 2.00 0.00 2.00 2.00 0.00 2.0 2.00 0.00 NaN NaN
## value 2 10000 0.51 2.31 0.52 0.51 2.27 -9.3 8.65 17.96 -0.01 0.04
## se
## group* 0.00
## value 0.02
## ------------------------------------------------------------
## group: right
## vars n mean sd median trimmed mad min max range skew kurtosis se
## group* 1 10000 3.00 0.00 3.0 3.00 0.00 3 3.00 0.00 NaN NaN 0
## value 2 10000 0.23 0.15 0.2 0.22 0.16 0 0.94 0.94 0.82 0.26 0
## ------------------------------------------------------------
## group: left
## vars n mean sd median trimmed mad min max range skew kurtosis
## group* 1 10000 4.00 0.00 4.00 4.00 0.00 4.00 4 0.00 NaN NaN
## value 2 10000 0.83 0.14 0.87 0.85 0.13 0.17 1 0.83 -1.15 1.04
## se
## group* 0
## value 0
# yuen(y ~ x, data)
df_t <- long %>% filter(group == "norm1" | group == "left")
df_t$group <- droplevels(df_t$group)
summary(df_t)
## group value
## norm1:10000 Min. :-7.8726
## left :10000 1st Qu.: 0.4215
## Median : 0.8444
## Mean : 0.6626
## 3rd Qu.: 0.9807
## Max. : 7.6409
library(WRS2)
yuen(value ~ group, data = df_t)
## Call:
## yuen(formula = value ~ group, data = df_t)
##
## Test statistic: 17.1619 (df = 6057.23), p-value = 0
##
## Trimmed mean difference: -0.37038
## 95 percent confidence interval:
## -0.4127 -0.3281
##
## Explanatory measure of effect size: 0.43
Test statistic: 16.2028 (df = 6057.12), p-value = 0 Explanatory measure of effect size: 0.41 (medium to large)
library(ggplot2)
ggplot(df_t, aes(group, value)) +
geom_boxplot() +
geom_jitter(alpha = 0.01)
Compare to Welch’s t test:
t.test(value ~ group, df_t)
##
## Welch Two Sample t-test
##
## data: value by group
## t = -17.056, df = 10098, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means between group norm1 and group left is not equal to 0
## 95 percent confidence interval:
## -0.3808324 -0.3023177
## sample estimates:
## mean in group norm1 mean in group left
## 0.4917642 0.8333392
t = -15.684, df = 10099, p-value < 0.00000000000000022
library(rstatix)
cohens_d(df_t, value ~ group)
## # A tibble: 1 x 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 value norm1 left -0.241 10000 10000 small
-0.222 10000 10000 small effect
t1way(value ~ group, long)
## Call:
## t1way(formula = value ~ group, data = long)
##
## Test statistic: F = 27270.05
## Degrees of freedom 1: 3
## Degrees of freedom 2: 11978.89
## p-value: 0
##
## Explanatory measure of effect size: 0.4
## Bootstrap CI: [0.39; 0.41]
Test statistic: F = 27901.4 Degrees of freedom 1: 3 Degrees of freedom 2: 11982.42 p-value: 0
Explanatory measure of effect size: 0.4 Bootstrap CI: [0.39; 0.41]
lincon(value ~ group, long)
## Call:
## lincon(formula = value ~ group, data = long)
##
## psihat ci.lower ci.upper p.value
## norm1 vs. norm2 -0.02195 -0.10768 0.06377 0.50066
## norm1 vs. right 0.28129 0.22449 0.33809 0.00000
## norm1 vs. left -0.37038 -0.42714 -0.31362 0.00000
## norm2 vs. right 0.30324 0.23872 0.36777 0.00000
## norm2 vs. left -0.34843 -0.41291 -0.28394 0.00000
## right vs. left -0.65167 -0.65766 -0.64568 0.00000
library(car)
leveneTest(value ~ group, long)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 10100 < 0.00000000000000022 ***
## 39996
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
oneway.test(value ~ group, long, var.equal = F)
##
## One-way analysis of means (not assuming equal variances)
##
## data: value and group
## F = 27845, num df = 3, denom df = 19998, p-value < 0.00000000000000022
F = 28044, num df = 3, denom df = 20013, p-value < 0.00000000000000022
pairwise.t.test(long$value, long$group, p.adjust.method = "BH")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: long$value and long$group
##
## norm1 norm2 right
## norm2 0.43 - -
## right <0.0000000000000002 <0.0000000000000002 -
## left <0.0000000000000002 <0.0000000000000002 <0.0000000000000002
##
## P value adjustment method: BH
norm1 vs. norm2 p = 0.45
library(easystats)
correlation(df, method = "pearson")
## # Correlation Matrix (pearson-method)
##
## Parameter1 | Parameter2 | r | 95% CI | t(9998) | p
## ----------------------------------------------------------------------
## norm1 | norm2 | -8.82e-03 | [-0.03, 0.01] | -0.88 | > .999
## norm1 | right | -0.02 | [-0.04, 0.00] | -1.88 | 0.360
## norm1 | left | 3.50e-03 | [-0.02, 0.02] | 0.35 | > .999
## norm2 | right | -1.93e-03 | [-0.02, 0.02] | -0.19 | > .999
## norm2 | left | -0.01 | [-0.03, 0.00] | -1.48 | 0.696
## right | left | 5.54e-03 | [-0.01, 0.03] | 0.55 | > .999
##
## p-value adjustment method: Holm (1979)
## Observations: 10000
correlation(df, method = "kendall")
## # Correlation Matrix (kendall-method)
##
## Parameter1 | Parameter2 | tau | 95% CI | z | p
## --------------------------------------------------------------------
## norm1 | norm2 | -6.61e-03 | [-0.02, 0.01] | -0.99 | > .999
## norm1 | right | -0.01 | [-0.02, 0.00] | -1.59 | 0.572
## norm1 | left | 1.12e-03 | [-0.01, 0.01] | 0.17 | > .999
## norm2 | right | 6.12e-04 | [-0.01, 0.01] | 0.09 | > .999
## norm2 | left | -2.32e-03 | [-0.02, 0.01] | -0.35 | > .999
## right | left | 0.01 | [ 0.00, 0.02] | 1.67 | 0.572
##
## p-value adjustment method: Holm (1979)
## Observations: 10000
correlation(df, method = "spearman") # all non-significant, norm2 ^ left, p=.27
## # Correlation Matrix (spearman-method)
##
## Parameter1 | Parameter2 | rho | 95% CI | S | p
## -----------------------------------------------------------------------
## norm1 | norm2 | -9.96e-03 | [-0.03, 0.01] | 1.68e+11 | > .999
## norm1 | right | -0.02 | [-0.04, 0.00] | 1.69e+11 | 0.592
## norm1 | left | 1.74e-03 | [-0.02, 0.02] | 1.66e+11 | > .999
## norm2 | right | 8.53e-04 | [-0.02, 0.02] | 1.67e+11 | > .999
## norm2 | left | -3.54e-03 | [-0.02, 0.02] | 1.67e+11 | > .999
## right | left | 0.02 | [ 0.00, 0.04] | 1.64e+11 | 0.592
##
## p-value adjustment method: Holm (1979)
## Observations: 10000
pball(df)
## Call:
## pball(x = df)
##
## Robust correlation matrix:
## norm1 norm2 right left
## norm1 1.0000 -0.0100 -0.0162 -0.0006
## norm2 -0.0100 1.0000 0.0016 -0.0021
## right -0.0162 0.0016 1.0000 0.0141
## left -0.0006 -0.0021 0.0141 1.0000
##
## p-values:
## norm1 norm2 right left
## norm1 NA 0.31809 0.10514 0.95325
## norm2 0.31809 NA 0.86931 0.83104
## right 0.10514 0.86931 NA 0.15804
## left 0.95325 0.83104 0.15804 NA
##
##
## Test statistic H: 5.6916, p-value = 0.45861
norm2 ^ left, p = 0.04700
winall(df)
## Call:
## winall(x = df)
##
## Robust correlation matrix:
## norm1 norm2 right left
## norm1 1.0000 -0.0118 -0.0126 -0.0008
## norm2 -0.0118 1.0000 0.0012 -0.0026
## right -0.0126 0.0012 1.0000 0.0134
## left -0.0008 -0.0026 0.0134 1.0000
##
## p-values:
## norm1 norm2 right left
## norm1 NA 0.23692 0.20855 0.94002
## norm2 0.23692 NA 0.90528 0.79670
## right 0.20855 0.90528 NA 0.18013
## left 0.94002 0.79670 0.18013 NA
norm2 ^ left, p = 0.0344