Simulating Data
- y is simulated normal distribution with a mean of 0 and sd 1.
- y0 is a highly skewed distribution you might see if teachers do not grade to a curve. The mean is also 0 and the sd also 1.
library(tidyverse)
library(fGarch)
library(psych)
library(gridExtra)
y = rnorm(1000)
yo = fGarch::rsnorm(1000,xi=-6)
d <- data.frame(y=y,yo=yo)
hist_top <- ggplot(d,aes(y))+geom_histogram()
hist_right <- ggplot(d, aes(yo))+geom_histogram()
gridExtra::grid.arrange(hist_top, hist_right, ncol=2)

Distributions Qualities
d %>%
summarise(across(.fns = list(mean = mean, sd = sd, skew = skew), .names = "{col}_{fn}")) %>%
mutate_all(.funs = ~round(., 2)) %>%
rownames_to_column %>%
gather(var, value, -rowname) %>%
spread(rowname, value) %>%
tidyr::separate(var, into = c("Variable", "Statistic")) %>%
setNames(c("Variable","Statistic","Value")) %>%
knitr::kable()
| y |
mean |
-0.03 |
| y |
sd |
1.04 |
| y |
skew |
0.02 |
| yo |
mean |
0.03 |
| yo |
sd |
0.98 |
| yo |
skew |
-0.94 |
KS Test
ks.test(y, yo)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: y and yo
## D = 0.115, p-value = 3.61e-06
## alternative hypothesis: two-sided
d %>%
pivot_longer(cols = everything()) %>%
ggplot(aes(value, group = name, color = name)) +
stat_ecdf()

Relative Polarity
Note that the Skewed Variable has significantly more values in the upper tale.
r <- reldist::reldist(y=yo, yo=y,
bar=TRUE,
ci=TRUE,
smooth=0.1,
yolabs=seq(-1, 3, by=0.5),
ylim=c(0, 3.0),cex=0.8,
ylab="Relative Density",
xlab="Proportion of the Normal Variable")

tribble(
~ Polarity, ~ Estimate, ~ `CI-95%`, ~ `CI+95%`,
"Median Polarity", r$rp[2],r$rp[1],r$rp[3],
"Lower Tail Polarity", r$rpl[2],r$rpl[1],r$rpl[3],
"Upper Tail Polarity", r$rpu[2],r$rpu[1],r$rpu[3]
) %>%
knitr::kable(digits = 2)
| Median Polarity |
0.79 |
0.75 |
0.82 |
| Lower Tail Polarity |
-0.33 |
-0.40 |
-0.26 |
| Upper Tail Polarity |
0.81 |
0.72 |
0.91 |