Abtract: The Q-Q plot is a graphical test of normality that plots the z scores of observed data against the z scores of the empirical CDF.

Gaussian Data

generate some normal data, save it into a data frame:

G <- rnorm(n=N, m=5, sd=2)
G.data <- data.frame(G=G)

use ggplot to generate a histogram and Q-Q plot

HH <- ggplot(G.data, aes(G)) +
  geom_histogram(aes(y=..density..), 
                 bins=100,                     # 100 bins, where 100 = sqrt(N)
                 col="blue", fill="green") +
  geom_density(col="red") +
  labs(title="Gaussian Random Values") +
  theme_minimal(12)

QQ <- ggplot(G.data, aes(sample=G)) + 
  stat_qq() +
  geom_abline(color="blue", intercept=mean(G.data$G), slope=sd(G.data$G)) +
  labs(title="Gaussian (normal) data", subtitle="Q-Q plot") +
  theme_light()

grid.arrange(HH, QQ, nrow=1, ncol=2)

Pareto data

P.data <- data.frame(P=rpareto(n=N, location=100, shape=4) )

HH <- ggplot(P.data, aes(P)) +
  geom_histogram(aes(y=..density..), 
                 bins=100, 
                 col="blue", fill="green") +
  geom_density(col="red") +
  labs(title="Pareto Random Values") +
  theme_minimal(12)


QQ <- ggplot(P.data, aes(sample=P)) + 
  stat_qq() +
  stat_qq_line() +
  labs(title="Pareto data", subtitle="Q-Q plot") +
  theme_light()

grid.arrange(HH, QQ, nrow=1, ncol=2)

lognormal (µ = 5, σ = 1) L <- rlnorm(n = 10000, m=2.5, sd=0.5) uniform (range = (0, 10) U <- runif(n=10000, min=0, max=10) exponential (mean = 5) X <- rexp(n=10000, rate=5)