a <- read.csv(file = "https://stats.dip.jp/01_ds/data/QQplot.csv")
library(DT)
datatable(a)
b <- a$x

COLS <- c(rgb(1, 0, 1, .25),
          rgb(0, 0, 1, .25),
          rgb(0, 1, 0, .25))
GRAY <- rgb(.5, .5, .5, .25)

hist(b)

n <- 100

p <- function(k) (k - 0.5) / n
k <- 1:n
q <- qnorm(p = p(k))

x <- seq(-4, 4, 0.1)
y <- dnorm(x)
matplot(x, y, type = 'l')
abline(v = q, col = COLS[1])

xn.o <- b[order(b)]
matplot(x = q,y = xn.o, type = 'n',
     xlim = c(min(q), max(q)),
     ylim = c(min(xn.o), max(xn.o)),
     main = '正規Q-Qプロット',
     xlab = '正規分位数',
     ylab = '標本分位数')

points(x = q,y = xn.o, pch = 16, col = 'blue')
abline(a = 0, b = 1, col = 'red')
legend('topleft', col = 'blue', pch = 16, legend = '正規乱数')

library(car)
qqPlot(b,
       main = 'N(0, 1)',
       xlab = '正規分位数',
       ylab = '標本分位数',col='black')

## [1] 75 78
shapiro.test(b)
## 
##  Shapiro-Wilk normality test
## 
## data:  b
## W = 0.98069, p-value = 0.1503
#本当に正規分布に従ってるか検定しなさい p-valueが0.05未満では無い為、帰無仮説は棄却されない。標本分布は正規分布に従う。