ライブラリ
library(magrittr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
子どもの貧困問題に関する例題を子どものいる2人親世帯(724名)について行う. 2人親世帯(母子世帯以外)にデータを絞って以下の問に答えなさい. 全国の収入は架空の数字である. 以下では,t.test()を用いずに,t値を計算し,棄却域に入るかどうかを確認しなさい. また,p値についても報告すること.
PY110 <- read_csv("/Users/myang/Desktop/23A/社会調査/社会調査/data/PY110.csv")
## Rows: 3367 Columns: 5377
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (5377): PanelID, BLOCK, SIZE, sex, ybirth, mbirth, ZQ02A, zq02ay48, zq02...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
PY110 <- PY110 %>%
mutate(gender = factor(sex, levels = 1:2, labels = c("Male", "Female")),
income = car::recode(ZQ47C,
"1=0;2=12.5;3=50;4=100;5=200;
6=300;7=400;8=500;9=700;10=1000;
11=1500;12=2000;13=2250;else=NA"),
marry = car::recode(ZQ50, "c(1,3,4)=0;2=1;else=NA"),
child = car::recode(ZQ14_3,"1=1;2=0;else=NA"))
f <- PY110 %>%
dplyr::select(PanelID,gender,income,marry,child) %>%
dplyr::filter(child == 1 & gender == "Female")
f_familywithchild <- f %>% dplyr::filter(marry == 1)
x_bar <- mean(f_familywithchild$income, na.rm = TRUE)
sd <- sd(f_familywithchild$income, na.rm = TRUE)
n <- sum(!is.na(f_familywithchild$income))
list("サンプルサイズ" = n, "平均" = x_bar, "標準偏差" = sd)
## $サンプルサイズ
## [1] 386
##
## $平均
## [1] 557.6425
##
## $標準偏差
## [1] 289.2218
f_familywithchild %>%
summarise(N = sum(!is.na(income)),
Mean = mean(income, na.rm = TRUE),
SD = sd(income, na.rm = TRUE))
## # A tibble: 1 × 3
## N Mean SD
## <int> <dbl> <dbl>
## 1 386 558. 289.
臨界値
tval <- qt(0.975, df = n - 1)
tval
## [1] 1.966145
信頼区間
ll <- x_bar - tval*sd/sqrt(n)
ul <- x_bar + tval*sd/sqrt(n)
c(ll,ul)
## [1] 528.6989 586.5861
528.6989≤μ≤3586.586
mu0 <- 530
t <- (x_bar - mu0)/(sd/sqrt(n))
臨界値
uc <- qt(0.975, df = n-1)
lc <- qt(0.025, df = n-1)
t値が棄却域に入るか
t< lc | uc<t
## [1] FALSE
p値
pt(t,df = n-1) + 1- pt(-t,df = n-1)
## [1] 1.93883
uc <- qt(0.95, df = n-1)
t値が棄却域に入るか
uc < t
## [1] TRUE
p値
pt(t,df = n-1, lower.tail = FALSE)
## [1] 0.03058495