#Load packages
library(truncdist)
## Loading required package: stats4
## Loading required package: evd
library(ggplot2)
library(tidyr)
library(plyr)
set.seed(2)
We very often simulate data based on distributions under ideal conditions (e.g., normally distributed, single true effect size). However, these conditions are not truly reflective of real data we collect during psychological experiments. For example, when we use scale data, participants can only answer by indicating a whole round number (e.g., from 1 to 7). The scale has endpoints (1 and 7) that truncate the distribution. In this notebook I’ll play around with more realistic simulations, examine p-curve distributions, and model ways in which effects on a lower level might (or might not) translate into effects on a higher level.
#set up simulation paremeters
#grades go from 0 to 10, so truncate distributions
ll <- 0
ul <- 10
#set sample size
n <- 40
x <-data.frame(group1 = rtrunc(n, spec="norm", a=ll, b=ul, mean = 3, sd = 1.5),
group2 = rtrunc(n, spec="norm", a=ll, b=ul, mean = 6, sd = 1.5),
group3 = rtrunc(n, spec="norm", a=ll, b=ul, mean = 8, sd = 1.5),
group4 = rtrunc(n, spec="norm", a=ll, b=ul, mean = 4.5, sd = 1.5),
group5 = rtrunc(n, spec="norm", a=ll, b=ul, mean = 5.5, sd = 1.5),
group6 = rtrunc(n, spec="norm", a=ll, b=ul, mean = 7.5, sd = 1.5))
#to long format
df <- gather(x, variable, value)
df$condition <- c(rep(0,3*n), rep(1,3*n))
Note that we specify means and sd’s but these are optimal at the midpoint of the scale it seems, but not at the edges of the distribution:
#Check mean and sd per group
ddply(df,~variable,summarise,mean=mean(value),sd=sd(value))
A t-test can be performed to examine the differences between groups:
t.test(value~condition, data = df)
##
## Welch Two Sample t-test
##
## data: value by condition
## t = -0.51889, df = 229.15, p-value = 0.6043
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.711828 0.415063
## sample estimates:
## mean in group 0 mean in group 1
## 5.615913 5.764296
#count values larger than 5 for each group
aggregate(df$value>5, by=list(Category=df$variable), FUN=sum)
Plotting the entire distribution, or per group:
ggplot(df, aes(x=value)) +
geom_density(fill="red", alpha = 0.25)
ggplot(df, aes(x=value, fill=variable)) +
geom_histogram(alpha=0.25)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(df, aes(x=value)) +
geom_histogram(fill="red", alpha = 0.25)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(df, aes(x=value, fill=variable)) +
geom_density(alpha=0.25)
#LEFTOVERS
#count values larger than 5 for each group
aggregate(df$value>5, by=list(Category=df$variable), FUN=sum)