Abtract: The Q-Q plot is a graphical test of normality that plots the z scores of observed data against the z scores of the empirical CDF.
G <- rnorm(n=N, m=5, sd=2)
G.data <- data.frame(G=G)
HH <- ggplot(G.data, aes(G)) +
geom_histogram(aes(y=..density..),
bins=100, # 100 bins, where 100 = sqrt(N)
col="blue", fill="green") +
geom_density(col="red") +
labs(title="Gaussian Random Values") +
theme_minimal(12)
QQ <- ggplot(G.data, aes(sample=G)) +
stat_qq() +
geom_abline(color="blue", intercept=mean(G.data$G), slope=sd(G.data$G)) +
labs(title="Gaussian (normal) data", subtitle="Q-Q plot") +
theme_light()
grid.arrange(HH, QQ, nrow=1, ncol=2)
P.data <- data.frame(P=rpareto(n=N, location=100, shape=4) )
HH <- ggplot(P.data, aes(P)) +
geom_histogram(aes(y=..density..),
bins=100,
col="blue", fill="green") +
geom_density(col="red") +
labs(title="Pareto Random Values") +
theme_minimal(12)
QQ <- ggplot(P.data, aes(sample=P)) +
stat_qq() +
stat_qq_line() +
labs(title="Pareto data", subtitle="Q-Q plot") +
theme_light()
grid.arrange(HH, QQ, nrow=1, ncol=2)
lognormal (µ = 5, σ = 1) L <- rlnorm(n = 10000, m=2.5, sd=0.5) uniform (range = (0, 10) U <- runif(n=10000, min=0, max=10) exponential (mean = 5) X <- rexp(n=10000, rate=5)