# 1-1
print(paste(556*(9/16),556*(3/16),556*(3/16),556*(1/16)))
## [1] "312.75 104.25 104.25 34.75"
# 1-2
p<-c(9,3,3,1)/16
N<-c(315,101,108,32)
# A.P値は0.9254であり、H_0は棄却されない
chisq.test(N,p=p)
##
## Chi-squared test for given probabilities
##
## data: N
## X-squared = 0.47002, df = 3, p-value = 0.9254
# 1-3
p<-c(38,31,22,9)/100
N<-c(15,9,2,1)
# A.P値は0.1276であり、H_0は棄却されない
chisq.test(N,p=p)
## Warning in chisq.test(N, p = p): Chi-squared approximation may be incorrect
##
## Chi-squared test for given probabilities
##
## data: N
## X-squared = 5.6922, df = 3, p-value = 0.1276
# A.P値は2.671e-12であり、H_0は棄却される
N<-c(150,90,20,10)
chisq.test(N,p=p)
##
## Chi-squared test for given probabilities
##
## data: N
## X-squared = 56.922, df = 3, p-value = 2.671e-12
# 1-4
p<-c(1,1,1,1,1,1,1,1,1,1,1,1)/12
N<-c(318,245,282,270,253,235,280,296,279,338,326,410)
# A.月毎の交通事故死者数には違いがある
chisq.test(N,p=p)
##
## Chi-squared test for given probabilities
##
## data: N
## X-squared = 87.311, df = 11, p-value = 5.597e-14
# 1-6 書籍より
x1<-rnorm(10000)
x2<-rnorm(10000)
x3<-rnorm(10000)
m<-(x1+x2+x3)/3
A<-(x1-m)**2+(x2-m)**2+(x3-m)**2
B<-x1**2+x2**2+x3**2
hist(A,prob=TRUE)
curve(dchisq(x,df=2),add=TRUE)
hist(B,prob=TRUE)
curve(dchisq(x,df=3),add=TRUE)
\(参考:自由度11のχ二乗分布\)
library(ggplot2)
x <- seq(0, 30, 0.1) # x軸の範囲を設定
y <- dchisq(x, df = 11) # 自由度11のχ二乗分布の確率密度関数を計算
data <- data.frame(x = x, y = y)
ggplot(data, aes(x = x, y = y)) +
geom_line() +
xlab("x") + ylab("Probability Density") +
ggtitle("Chi-squared Distribution (df = 11)")
# 2-1
N <- c(48, 52, 59, 71, 64, 50, 35, 49)
p <- c(1, 1, 1, 1, 1, 1, 1, 1) / 8
# H_0:月の満ち欠けと出産数が独立であるという帰無仮説は棄却される
res <- chisq.test(N, p = p)
# 月相4-5が特異的に多く、7-8が特異的に少ない
round(pnorm(abs(res$stdres), lower.tail = FALSE) * 2, 4)
## [1] 0.4215 0.8265 0.4215 0.0105 0.1249 0.6090 0.0069 0.5107
bp <- barplot(N, names.arg = c("1 to 2", "2 to 3", "3 to 4", "4 to 5", "5 to 6", "6 to 7", "7 to 8", "8 to 1"), xlab = "moon phase", ylab = "number of deliveries")
# text(x=bp,y=N,labels = N,pos=3,xpd=NA)
# 2-2
# (1)
pass.fail <- matrix(c(163, 218, 57, 48), 2, 2)
# (2)
res <- chisq.test(pass.fail)
round(pnorm(abs(res$stdres), lower.tail = FALSE) * 2, 4)
## [,1] [,2]
## [1,] 0.036 0.036
## [2,] 0.036 0.036
# →男子の方が合格しやすいと言える。
# 2-3
# H_0:サリドマイドと奇形の有無は関係がない
thalidomide <- matrix(c(90,22,2,186), 2, 2)
chisq.test(thalidomide, correct=FALSE)
##
## Pearson's Chi-squared test
##
## data: thalidomide
## X-squared = 207.55, df = 1, p-value < 2.2e-16
chisq.test(thalidomide)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: thalidomide
## X-squared = 203.84, df = 1, p-value < 2.2e-16
# →H_0:サリドマイドと奇形の有無は関係がないが棄却される
# 2-4
# H_0:ワクチン接種とインフルエンザの罹患には関係がない
vaccination <- matrix(c(1,15,22,62), 2, 2,
dimnames = list(c("接種あり", "接種なし"), c("罹患", "罹患でない")))
print(vaccination)
## 罹患 罹患でない
## 接種あり 1 22
## 接種なし 15 62
chisq.test(vaccination, correct=FALSE)
## Warning in chisq.test(vaccination, correct = FALSE): Chi-squared approximation
## may be incorrect
##
## Pearson's Chi-squared test
##
## data: vaccination
## X-squared = 3.0175, df = 1, p-value = 0.08237
chisq.test(vaccination)
## Warning in chisq.test(vaccination): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: vaccination
## X-squared = 1.9966, df = 1, p-value = 0.1577
# →イエーツの補正をした場合でも、しない場合でも有意水準5%で有意にはならない。H_0は棄却されない。よってワクチン接種とインフルエンザの罹患に関係があるともないとも言えない。サンプルサイズが大きくなれば異なる結果が出るかもしれない。
# 3-3
# [,1] Ozone numeric Ozone (ppb)
# [,3] Wind numeric Wind (mph)
# A.風速が1mph上がるときオゾン量は-5.5509ppb変化する
res <- lm(formula = Ozone ~ Wind, data = airquality)
plot(airquality$Wind, airquality$Ozone)
abline(res)
summary(res)
##
## Call:
## lm(formula = Ozone ~ Wind, data = airquality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.572 -18.854 -4.868 15.234 90.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.8729 7.2387 13.38 < 2e-16 ***
## Wind -5.5509 0.6904 -8.04 9.27e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 26.47 on 114 degrees of freedom
## ( 37 個の観測値が欠損のため削除されました )
## Multiple R-squared: 0.3619, Adjusted R-squared: 0.3563
## F-statistic: 64.64 on 1 and 114 DF, p-value: 9.272e-13
# A.風速が1mph上がると、オゾン量は5.5509ppb低下する
# 3-4
# dplyrパッケージを読み込む
library(dplyr)
# [, 1] mpg Miles/(US) gallon
# [, 3] disp Displacement (cu.in.)←エンジンの排気量
data <- select(mtcars, disp, mpg)
# 散布図をプロット
plot(data$disp, data$mpg, xlab = "disp", ylab = "mpg", main = "Scatter plot of mpg vs disp")
# 回帰モデルを作成し、回帰直線をプロット
res <- lm(mpg ~ disp, data = data)
abline(res, col = "red")
summary(res)
##
## Call:
## lm(formula = mpg ~ disp, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8922 -2.2022 -0.9631 1.6272 7.2305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.599855 1.229720 24.070 < 2e-16 ***
## disp -0.041215 0.004712 -8.747 9.38e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.251 on 30 degrees of freedom
## Multiple R-squared: 0.7183, Adjusted R-squared: 0.709
## F-statistic: 76.51 on 1 and 30 DF, p-value: 9.38e-10
# A.切片、dispともに有意である
# 4-1
women
## height weight
## 1 58 115
## 2 59 117
## 3 60 120
## 4 61 123
## 5 62 126
## 6 63 129
## 7 64 132
## 8 65 135
## 9 66 139
## 10 67 142
## 11 68 146
## 12 69 150
## 13 70 154
## 14 71 159
## 15 72 164
summary(women)
## height weight
## Min. :58.0 Min. :115.0
## 1st Qu.:61.5 1st Qu.:124.5
## Median :65.0 Median :135.0
## Mean :65.0 Mean :136.7
## 3rd Qu.:68.5 3rd Qu.:148.0
## Max. :72.0 Max. :164.0
res <- lm(height ~ weight, data = women)
res2 <- lm(height ~ poly(weight, 2, raw = TRUE), data = women)
summary(res)
##
## Call:
## lm(formula = height ~ weight, data = women)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83233 -0.26249 0.08314 0.34353 0.49790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.723456 1.043746 24.64 2.68e-12 ***
## weight 0.287249 0.007588 37.85 1.09e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.44 on 13 degrees of freedom
## Multiple R-squared: 0.991, Adjusted R-squared: 0.9903
## F-statistic: 1433 on 1 and 13 DF, p-value: 1.091e-14
summary(res2)
##
## Call:
## lm(formula = height ~ poly(weight, 2, raw = TRUE), data = women)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.105338 -0.035764 -0.004898 0.049430 0.141593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.175e+01 1.720e+00 -6.83 1.82e-05 ***
## poly(weight, 2, raw = TRUE)1 8.343e-01 2.502e-02 33.35 3.36e-13 ***
## poly(weight, 2, raw = TRUE)2 -1.973e-03 9.014e-05 -21.89 4.84e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07158 on 12 degrees of freedom
## Multiple R-squared: 0.9998, Adjusted R-squared: 0.9997
## F-statistic: 2.732e+04 on 2 and 12 DF, p-value: < 2.2e-16
plot(women$weight, women$height)
lines(women$weight, fitted(res))
lines(women$weight, fitted(res2))
# 4-4
# 5-1
# 5-2
# 5-3
# 5-4
# 5-5
# 5-6
# 5-7
# 5-8
# 6-1
population <- read.csv("population2.csv", header = FALSE)
population <- population$V1
ord <- order(population, decreasing = TRUE)
population_sorted <- population[ord]
n <- 1:length(population)
# (1)
plot(n, population_sorted, xlab = "順位n", ylab = "人口p", main = "人口データ")
# (2)
plot(log10(n), log10(population_sorted), xlab = "順位n", ylab = "人口p", main = "人口データ")
# (3)
res <- lm(log(population_sorted) ~ log(n))
summary(res)
##
## Call:
## lm(formula = log(population_sorted) ~ log(n))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32082 -0.04313 0.01039 0.03162 0.68008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.20698 0.09535 75.58 <2e-16 ***
## log(n) -0.99683 0.03632 -27.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1663 on 28 degrees of freedom
## Multiple R-squared: 0.9642, Adjusted R-squared: 0.9629
## F-statistic: 753.3 on 1 and 28 DF, p-value: < 2.2e-16
# logp = 7.20698 - 0.99683logn
# p = exp(7.20698) * exp(log(n^- 0.99683))
# p = 1348.813 * n^- 0.99683
# (4)
res <- nls(population_sorted ~ c * n^alpha,
start = c(c = exp(7.2), alpha = -1),
trace = TRUE
)
## 468486.1 (3.92e-01): par = (1339.431 -1)
## 406223. (7.16e-03): par = (1543.547 -1.011995)
## 406204.6 (8.84e-04): par = (1542.441 -1.00901)
## 406204.3 (1.12e-04): par = (1542.723 -1.009395)
## 406204.3 (1.41e-05): par = (1542.687 -1.009347)
## 406204.3 (1.79e-06): par = (1542.692 -1.009353)
summary(res)
##
## Formula: population_sorted ~ c * n^alpha
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## c 1542.69172 113.22524 13.62 7.04e-14 ***
## alpha -1.00935 0.08239 -12.25 9.13e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 120.4 on 28 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 1.789e-06
# 6-2
# (1)
f <- function(x) {
return(x^3 - 3 * x)
}
x <- seq(-2, 2, by = 0.1)
y <- f(x) + rnorm(length(x), 0, 0.2)
plot(x, y)
curve(f, add = TRUE)
# (2)
res <- lm(y ~ poly(x, 3, raw = TRUE))
# A.H_0(a):a=0,H_0(c):c=0は棄却されない。
summary(res)
##
## Call:
## lm(formula = y ~ poly(x, 3, raw = TRUE))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.39399 -0.11088 0.00486 0.08375 0.46685
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10560 0.04542 2.325 0.0257 *
## poly(x, 3, raw = TRUE)1 -3.01500 0.06408 -47.047 <2e-16 ***
## poly(x, 3, raw = TRUE)2 -0.03894 0.02419 -1.610 0.1160
## poly(x, 3, raw = TRUE)3 1.01374 0.02333 43.443 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1938 on 37 degrees of freedom
## Multiple R-squared: 0.9836, Adjusted R-squared: 0.9823
## F-statistic: 738.9 on 3 and 37 DF, p-value: < 2.2e-16
# (2)
# 略
# 7-1
# 7-2
# 7-3
# 7-4
# 7-5
# 7-6
# 7-7
# 7-8
# 8-1
# 8-2
# 8-3
# 8-4
# 8-5
# 8-6
# 8-7
# 8-8
# 9-1
# 9-2
# 9-3
# 9-4
# 9-5
# 9-6
# 9-7
# 9-8
# 10-1
# 10-2
# 10-3
# 10-4
# 10-5
# 10-6
# 10-7
# 10-8
# 11-1
# 11-2
# 11-3
# 11-4
# 11-5
# 11-6
# 11-7
# 11-8
# 12-1
# 12-2
# 12-3
# 12-4
# 12-5
# 12-6
# 12-7
# 12-8
# 13-1
# 13-2
# 13-3
# 13-4
# 13-5
# 13-6
# 13-7
# 13-8
# 14-1
# 14-2
# 14-3
# 14-4
# 14-5
# 14-6
# 14-7
# 14-8