COLS <- c(rgb(1, 0, 1, .25), # ピンク
rgb(0, 0, 1, .25), # ラベンダー
rgb(0, 1, 0, .25)) # グリーン
GRAY <- rgb(.5, .5, .5, .25)
p0 <- read.csv(file = 'https://stats.dip.jp/01_ds/data/QQplot.csv')
library(DT)
datatable(p0)
set.seed(10)
n <- 100
xn <- rnorm(n)
xt <- rt(n, df = 10)
summary(data.frame(xn, xt))
## xn xt
## Min. :-2.1853 Min. :-2.0669
## 1st Qu.:-0.8291 1st Qu.:-0.5995
## Median :-0.1933 Median : 0.0516
## Mean :-0.1365 Mean : 0.1114
## 3rd Qu.: 0.5934 3rd Qu.: 0.8164
## Max. : 2.2205 Max. : 3.7912
bins <- seq(-3, 4, 1)
(q1 <- quantile(xn, probs = c(1/4, 2/4, 3/4)))
## 25% 50% 75%
## -0.8290776 -0.1933164 0.5933604
(q2 <- quantile(xt, probs = c(1/4, 2/4, 3/4)))
## 25% 50% 75%
## -0.59945238 0.05160441 0.81639760
matplot(x = q1, y = q2, pch = 16,ylim=c(-10,10),
main = '散布図(四分位数同士の比較)',
xlab = '標本1分位数',
ylab = '標本2分位数')
grid()
abline(a = 0, b = 10, col = 'red')

hist(xn, breaks = bins, col = COLS[1],
main = 'ヒストグラム 1',
xlab = '観測値',
ylab = '度数')

hist(xt, breaks = bins, col = COLS[2],
main = 'ヒストグラム 2',
xlab = '観測値',
ylab = '度数')

p <- function(k) (k - 0.5) / n
k <- 1:n
q <- qnorm(p = p(k))
x <- seq(-4, 4, 0.1)
y <- dnorm(x)
matplot(x, y, type = 'l')
abline(v = q, col = COLS[1])

xn.o <- xn[order(xn)]
xt.o <- xt[order(xt)]
matplot(x = q, y = xn.o, type = 'n',
xlim = c(-4, 4),
ylim = c(-4, 4),
main = '正規Q-Qプロット',
xlab = '正規分位数',
ylab = '標本分位数')
matpoints(x = q, y = xn.o, pch = 16, col = COLS[1])
matpoints(x = q, y = xt.o, pch = 16, col = COLS[2])
abline(a = 0, b = 1, col = 'red')
legend('topleft', col = COLS[1:2], pch = 16,
legend = c('標本1(正規乱数)', '標本2(t乱数)'))

library(car)
qqPlot(xn,
main = 'N(0, 1)',
xlab = '正規分位数',
ylab = '標本分位数')

## [1] 88 54
qqPlot(xt,
main = 't(d.f.=10)',
xlab = '正規分位数',
ylab = '標本分位数')

## [1] 52 49
shapiro.test(xn)
##
## Shapiro-Wilk normality test
##
## data: xn
## W = 0.98908, p-value = 0.5911
shapiro.test(xt)
##
## Shapiro-Wilk normality test
##
## data: xt
## W = 0.96269, p-value = 0.006286