1 章
2 章
3 章
4 章
5 章
6 章
7 章
8 章
9 章
10 章
11 章
12 章
13 章
14 章

1 章

# 1-1
print(paste(556*(9/16),556*(3/16),556*(3/16),556*(1/16)))

## [1] "312.75 104.25 104.25 34.75"

# 1-2
p<-c(9,3,3,1)/16
N<-c(315,101,108,32)
# A.P値は0.9254であり、H_0は棄却されない
chisq.test(N,p=p)

## 
##  Chi-squared test for given probabilities
## 
## data:  N
## X-squared = 0.47002, df = 3, p-value = 0.9254

# 1-3
p<-c(38,31,22,9)/100
N<-c(15,9,2,1)

# A.P値は0.1276であり、H_0は棄却されない
chisq.test(N,p=p)

## Warning in chisq.test(N, p = p): Chi-squared approximation may be incorrect

## 
##  Chi-squared test for given probabilities
## 
## data:  N
## X-squared = 5.6922, df = 3, p-value = 0.1276

# A.P値は2.671e-12であり、H_0は棄却される
N<-c(150,90,20,10)
chisq.test(N,p=p)

## 
##  Chi-squared test for given probabilities
## 
## data:  N
## X-squared = 56.922, df = 3, p-value = 2.671e-12

# 1-4
p<-c(1,1,1,1,1,1,1,1,1,1,1,1)/12
N<-c(318,245,282,270,253,235,280,296,279,338,326,410)
# A.月毎の交通事故死者数には違いがある
chisq.test(N,p=p)

## 
##  Chi-squared test for given probabilities
## 
## data:  N
## X-squared = 87.311, df = 11, p-value = 5.597e-14

# 1-6 書籍より
x1<-rnorm(10000)
x2<-rnorm(10000)
x3<-rnorm(10000)
m<-(x1+x2+x3)/3
A<-(x1-m)**2+(x2-m)**2+(x3-m)**2
B<-x1**2+x2**2+x3**2

hist(A,prob=TRUE)
curve(dchisq(x,df=2),add=TRUE)

hist(B,prob=TRUE)
curve(dchisq(x,df=3),add=TRUE)

\(参考：自由度11のχ二乗分布\)

library(ggplot2)

x <- seq(0, 30, 0.1)  # x軸の範囲を設定
y <- dchisq(x, df = 11)  # 自由度11のχ二乗分布の確率密度関数を計算

data <- data.frame(x = x, y = y)
ggplot(data, aes(x = x, y = y)) +
 geom_line() +
 xlab("x") + ylab("Probability Density") +
 ggtitle("Chi-squared Distribution (df = 11)")

2 章

# 2-1
N <- c(48, 52, 59, 71, 64, 50, 35, 49)
p <- c(1, 1, 1, 1, 1, 1, 1, 1) / 8

# H_0:月の満ち欠けと出産数が独立であるという帰無仮説は棄却される
res <- chisq.test(N, p = p)

# 月相4-5が特異的に多く、7-8が特異的に少ない
round(pnorm(abs(res$stdres), lower.tail = FALSE) * 2, 4)

## [1] 0.4215 0.8265 0.4215 0.0105 0.1249 0.6090 0.0069 0.5107

bp <- barplot(N, names.arg = c("1 to 2", "2 to 3", "3 to 4", "4 to 5", "5 to 6", "6 to 7", "7 to 8", "8 to 1"), xlab = "moon phase", ylab = "number of deliveries")

# text(x=bp,y=N,labels = N,pos=3,xpd=NA)

# 2-2
# (1)
pass.fail <- matrix(c(163, 218, 57, 48), 2, 2)
# (2)
res <- chisq.test(pass.fail)
round(pnorm(abs(res$stdres), lower.tail = FALSE) * 2, 4)

##       [,1]  [,2]
## [1,] 0.036 0.036
## [2,] 0.036 0.036

# →男子の方が合格しやすいと言える。

# 2-3
# H_0:サリドマイドと奇形の有無は関係がない
thalidomide <- matrix(c(90,22,2,186), 2, 2)
chisq.test(thalidomide, correct=FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  thalidomide
## X-squared = 207.55, df = 1, p-value < 2.2e-16

chisq.test(thalidomide)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  thalidomide
## X-squared = 203.84, df = 1, p-value < 2.2e-16

# →H_0:サリドマイドと奇形の有無は関係がないが棄却される

# 2-4
# H_0:ワクチン接種とインフルエンザの罹患には関係がない
vaccination <- matrix(c(1,15,22,62), 2, 2,
                      dimnames = list(c("接種あり", "接種なし"), c("罹患", "罹患でない")))

print(vaccination)

##          罹患 罹患でない
## 接種あり    1         22
## 接種なし   15         62

chisq.test(vaccination, correct=FALSE)

## Warning in chisq.test(vaccination, correct = FALSE): Chi-squared approximation
## may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  vaccination
## X-squared = 3.0175, df = 1, p-value = 0.08237

chisq.test(vaccination)

## Warning in chisq.test(vaccination): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  vaccination
## X-squared = 1.9966, df = 1, p-value = 0.1577

# →イエーツの補正をした場合でも、しない場合でも有意水準5%で有意にはならない。H_0は棄却されない。よってワクチン接種とインフルエンザの罹患に関係があるともないとも言えない。サンプルサイズが大きくなれば異なる結果が出るかもしれない。

3 章

# 3-3

# [,1]  Ozone   numeric Ozone (ppb)
# [,3]  Wind    numeric Wind (mph)
# A.風速が1mph上がるときオゾン量は-5.5509ppb変化する
res <- lm(formula = Ozone ~ Wind, data = airquality)

plot(airquality$Wind, airquality$Ozone)
abline(res)

summary(res)

## 
## Call:
## lm(formula = Ozone ~ Wind, data = airquality)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -51.572 -18.854  -4.868  15.234  90.000 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  96.8729     7.2387   13.38  < 2e-16 ***
## Wind         -5.5509     0.6904   -8.04 9.27e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.47 on 114 degrees of freedom
##   ( 37 個の観測値が欠損のため削除されました )
## Multiple R-squared:  0.3619, Adjusted R-squared:  0.3563 
## F-statistic: 64.64 on 1 and 114 DF,  p-value: 9.272e-13

# A.風速が1mph上がると、オゾン量は5.5509ppb低下する

# 3-4
# dplyrパッケージを読み込む
library(dplyr)


# [, 1] mpg Miles/(US) gallon
# [, 3] disp    Displacement (cu.in.)←エンジンの排気量
data <- select(mtcars, disp, mpg)

# 散布図をプロット
plot(data$disp, data$mpg, xlab = "disp", ylab = "mpg", main = "Scatter plot of mpg vs disp")

# 回帰モデルを作成し、回帰直線をプロット
res <- lm(mpg ~ disp, data = data)
abline(res, col = "red")

summary(res)

## 
## Call:
## lm(formula = mpg ~ disp, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8922 -2.2022 -0.9631  1.6272  7.2305 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 29.599855   1.229720  24.070  < 2e-16 ***
## disp        -0.041215   0.004712  -8.747 9.38e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.251 on 30 degrees of freedom
## Multiple R-squared:  0.7183, Adjusted R-squared:  0.709 
## F-statistic: 76.51 on 1 and 30 DF,  p-value: 9.38e-10

# A.切片、dispともに有意である

4 章

# 4-1
women

##    height weight
## 1      58    115
## 2      59    117
## 3      60    120
## 4      61    123
## 5      62    126
## 6      63    129
## 7      64    132
## 8      65    135
## 9      66    139
## 10     67    142
## 11     68    146
## 12     69    150
## 13     70    154
## 14     71    159
## 15     72    164

summary(women)

##      height         weight     
##  Min.   :58.0   Min.   :115.0  
##  1st Qu.:61.5   1st Qu.:124.5  
##  Median :65.0   Median :135.0  
##  Mean   :65.0   Mean   :136.7  
##  3rd Qu.:68.5   3rd Qu.:148.0  
##  Max.   :72.0   Max.   :164.0

res <- lm(height ~ weight, data = women)
res2 <- lm(height ~ poly(weight, 2, raw = TRUE), data = women)
summary(res)

## 
## Call:
## lm(formula = height ~ weight, data = women)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83233 -0.26249  0.08314  0.34353  0.49790 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.723456   1.043746   24.64 2.68e-12 ***
## weight       0.287249   0.007588   37.85 1.09e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.44 on 13 degrees of freedom
## Multiple R-squared:  0.991,  Adjusted R-squared:  0.9903 
## F-statistic:  1433 on 1 and 13 DF,  p-value: 1.091e-14

summary(res2)

## 
## Call:
## lm(formula = height ~ poly(weight, 2, raw = TRUE), data = women)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.105338 -0.035764 -0.004898  0.049430  0.141593 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -1.175e+01  1.720e+00   -6.83 1.82e-05 ***
## poly(weight, 2, raw = TRUE)1  8.343e-01  2.502e-02   33.35 3.36e-13 ***
## poly(weight, 2, raw = TRUE)2 -1.973e-03  9.014e-05  -21.89 4.84e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.07158 on 12 degrees of freedom
## Multiple R-squared:  0.9998, Adjusted R-squared:  0.9997 
## F-statistic: 2.732e+04 on 2 and 12 DF,  p-value: < 2.2e-16

plot(women$weight, women$height)
lines(women$weight, fitted(res))
lines(women$weight, fitted(res2))

# 4-4

5 章

# 5-1
# 5-2
# 5-3
# 5-4
# 5-5
# 5-6
# 5-7
# 5-8

6 章

# 6-1
population <- read.csv("population2.csv", header = FALSE)
population <- population$V1

ord <- order(population, decreasing = TRUE)
population_sorted <- population[ord]
n <- 1:length(population)
# (1)
plot(n, population_sorted, xlab = "順位n", ylab = "人口p", main = "人口データ")

# (2)
plot(log10(n), log10(population_sorted), xlab = "順位n", ylab = "人口p", main = "人口データ")

# (3)
res <- lm(log(population_sorted) ~ log(n))
summary(res)

## 
## Call:
## lm(formula = log(population_sorted) ~ log(n))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32082 -0.04313  0.01039  0.03162  0.68008 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.20698    0.09535   75.58   <2e-16 ***
## log(n)      -0.99683    0.03632  -27.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1663 on 28 degrees of freedom
## Multiple R-squared:  0.9642, Adjusted R-squared:  0.9629 
## F-statistic: 753.3 on 1 and 28 DF,  p-value: < 2.2e-16

# logp = 7.20698 - 0.99683logn
#    p = exp(7.20698) * exp(log(n^- 0.99683))
#    p = 1348.813 * n^- 0.99683

# (4)
res <- nls(population_sorted ~ c * n^alpha,
  start = c(c = exp(7.2), alpha = -1),
  trace = TRUE
)

## 468486.1    (3.92e-01): par = (1339.431 -1)
##       406223. (7.16e-03): par = (1543.547 -1.011995)
## 406204.6    (8.84e-04): par = (1542.441 -1.00901)
## 406204.3    (1.12e-04): par = (1542.723 -1.009395)
## 406204.3    (1.41e-05): par = (1542.687 -1.009347)
## 406204.3    (1.79e-06): par = (1542.692 -1.009353)

summary(res)

## 
## Formula: population_sorted ~ c * n^alpha
## 
## Parameters:
##         Estimate Std. Error t value Pr(>|t|)    
## c     1542.69172  113.22524   13.62 7.04e-14 ***
## alpha   -1.00935    0.08239  -12.25 9.13e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 120.4 on 28 degrees of freedom
## 
## Number of iterations to convergence: 5 
## Achieved convergence tolerance: 1.789e-06

# 6-2

# (1)

f <- function(x) {
  return(x^3 - 3 * x)
}

x <- seq(-2, 2, by = 0.1)
y <- f(x) + rnorm(length(x), 0, 0.2)

plot(x, y)
curve(f, add = TRUE)

# (2)
res <- lm(y ~ poly(x, 3, raw = TRUE))
# A.H_0(a):a=0,H_0(c):c=0は棄却されない。
summary(res)

## 
## Call:
## lm(formula = y ~ poly(x, 3, raw = TRUE))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39399 -0.11088  0.00486  0.08375  0.46685 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.10560    0.04542   2.325   0.0257 *  
## poly(x, 3, raw = TRUE)1 -3.01500    0.06408 -47.047   <2e-16 ***
## poly(x, 3, raw = TRUE)2 -0.03894    0.02419  -1.610   0.1160    
## poly(x, 3, raw = TRUE)3  1.01374    0.02333  43.443   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1938 on 37 degrees of freedom
## Multiple R-squared:  0.9836, Adjusted R-squared:  0.9823 
## F-statistic: 738.9 on 3 and 37 DF,  p-value: < 2.2e-16

# (2)
# 略

7 章

# 7-1
# 7-2
# 7-3
# 7-4
# 7-5
# 7-6
# 7-7
# 7-8

8 章

# 8-1
# 8-2
# 8-3
# 8-4
# 8-5
# 8-6
# 8-7
# 8-8

9 章

# 9-1
# 9-2
# 9-3
# 9-4
# 9-5
# 9-6
# 9-7
# 9-8

10 章

# 10-1
# 10-2
# 10-3
# 10-4
# 10-5
# 10-6
# 10-7
# 10-8

11 章

# 11-1
# 11-2
# 11-3
# 11-4
# 11-5
# 11-6
# 11-7
# 11-8

12 章

# 12-1
# 12-2
# 12-3
# 12-4
# 12-5
# 12-6
# 12-7
# 12-8

13 章

# 13-1
# 13-2
# 13-3
# 13-4
# 13-5
# 13-6
# 13-7
# 13-8

14 章

# 14-1
# 14-2
# 14-3
# 14-4
# 14-5
# 14-6
# 14-7
# 14-8

Rで学ぶ確率統計学多変量統計編

T_

1 章

2 章

3 章

4 章

5 章

6 章

7 章

8 章

9 章

10 章

11 章

12 章

13 章

14 章

Rで学ぶ確率統計学 多変量統計編

T_

1 章

2 章

3 章

4 章

5 章

6 章

7 章

8 章

9 章

10 章

11 章

12 章

13 章

14 章

Rで学ぶ確率統計学多変量統計編