rm(list = ls())




# 1. 데이터 로드 (내장 데이터셋)
data(chickwts)
df <- chickwts
head(df)
##   weight      feed
## 1    179 horsebean
## 2    160 horsebean
## 3    136 horsebean
## 4    227 horsebean
## 5    217 horsebean
## 6    168 horsebean
summary(df)
##      weight             feed   
##  Min.   :108.0   casein   :12  
##  1st Qu.:204.5   horsebean:10  
##  Median :258.0   linseed  :12  
##  Mean   :261.3   meatmeal :11  
##  3rd Qu.:323.5   soybean  :14  
##  Max.   :423.0   sunflower:12
# weight는 오른쪽꼬리분포인지 왼쪽꼬리인지??
# 답 : mean > median이면 오른쪽 꼬리분포

# feed 변수는 범주형 변수인가? 맞습니다.
# chickwts 데이터 관측치 수(표본크기)갯수는? -> 12+10+12+11+14+12

chickwts$weight # -> 값만 출력
##  [1] 179 160 136 227 217 168 108 124 143 140 309 229 181 141 260 203 148 169 213
## [20] 257 244 271 243 230 248 327 329 250 193 271 316 267 199 171 158 248 423 340
## [39] 392 339 341 226 320 295 334 322 297 318 325 257 303 315 380 153 263 242 206
## [58] 344 258 368 390 379 260 404 318 352 359 216 222 283 332
# 2. chickwts$weight 벡터 요약
summary(chickwts$weight)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   108.0   204.5   258.0   261.3   323.5   423.0
length(chickwts$weight)  # 관측치 수 확인
## [1] 71
# 3. 일표본 t-검정: 평균이 260인지 검정 (양측 검정)
t.test(chickwts$weight, mu = 260)
## 
##  One Sample t-test
## 
## data:  chickwts$weight
## t = 0.14137, df = 70, p-value = 0.888
## alternative hypothesis: true mean is not equal to 260
## 95 percent confidence interval:
##  242.8301 279.7896
## sample estimates:
## mean of x 
##  261.3099
# mu : 모평균

#  결과 해석
# - 귀무가설(H0): 평균 무게 = 260g
# - 대립가설(H1): 평균 무게 ≠ 260g
# - p-value < 0.05이면 귀무가설 기각 (260과 유의한 차이)


# group 1의 extra 수면 증가량이 평균 1.5와 유의하게 
# 다른지 확인하기 위한 양측검정 예시
# 1. sleep 데이터 불러오기
data(sleep)
head(sleep)
##   extra group ID
## 1   0.7     1  1
## 2  -1.6     1  2
## 3  -0.2     1  3
## 4  -1.2     1  4
## 5  -0.1     1  5
## 6   3.4     1  6
summary(sleep)
##      extra        group        ID   
##  Min.   :-1.600   1:10   1      :2  
##  1st Qu.:-0.025   2:10   2      :2  
##  Median : 0.950          3      :2  
##  Mean   : 1.540          4      :2  
##  3rd Qu.: 3.400          5      :2  
##  Max.   : 5.500          6      :2  
##                          (Other):8
# extra는 mean > median이기 때문에 오른쪽꼬리분포
# group은 범주형 변수
# 데이터 크기는 10+10 = 20


# 2. group 1만 추출
group1 <- subset(sleep, group == 1)$extra
# 3. 단일표본 t-검정 수행 (모평균 1.5와 비교)
t.test(group1, mu = 1.5)
## 
##  One Sample t-test
## 
## data:  group1
## t = -1.3257, df = 9, p-value = 0.2176
## alternative hypothesis: true mean is not equal to 1.5
## 95 percent confidence interval:
##  -0.5297804  2.0297804
## sample estimates:
## mean of x 
##      0.75
# 귀무가설: ctrl 그룹의 평균 무게는 5.0이다.
# 1. 데이터 불러오기
data("PlantGrowth")
head(PlantGrowth)
##   weight group
## 1   4.17  ctrl
## 2   5.58  ctrl
## 3   5.18  ctrl
## 4   6.11  ctrl
## 5   4.50  ctrl
## 6   4.61  ctrl
summary(PlantGrowth)
##      weight       group   
##  Min.   :3.590   ctrl:10  
##  1st Qu.:4.550   trt1:10  
##  Median :5.155   trt2:10  
##  Mean   :5.073            
##  3rd Qu.:5.530            
##  Max.   :6.310
# 2. ctrl 그룹 추출
ctrl <- subset(PlantGrowth, group == "ctrl")$weight
# 3. 일표본 t-검정 (mu = 5)
t.test(ctrl, mu = 5)
## 
##  One Sample t-test
## 
## data:  ctrl
## t = 0.17355, df = 9, p-value = 0.8661
## alternative hypothesis: true mean is not equal to 5
## 95 percent confidence interval:
##  4.614882 5.449118
## sample estimates:
## mean of x 
##     5.032
data("PlantGrowth")
# 대조군 vs 실험군1 비교
group_ctrl <- subset(PlantGrowth, group == "ctrl")$weight
group_trt1 <- subset(PlantGrowth, group == "trt1")$weight
# 독립표본 t-검정
t.test(group_ctrl, group_trt1, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  group_ctrl and group_trt1
## t = 1.1913, df = 18, p-value = 0.249
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2833003  1.0253003
## sample estimates:
## mean of x mean of y 
##     5.032     4.661
# 1. 데이터 불러오기
data(sleep)
# 2. 데이터 구조 확인
str(sleep)
## 'data.frame':    20 obs. of  3 variables:
##  $ extra: num  0.7 -1.6 -0.2 -1.2 -0.1 3.4 3.7 0.8 0 2 ...
##  $ group: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ID   : Factor w/ 10 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
# 'data.frame': 20 obs. of  3 variables:
# $ extra : num  0.7 -1.6 -0.2 -1.2 -0.1 3.4 3.7 0.8 0 2 ...
# $ group : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2
# $ ID    : int  1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 10
# 3. 독립표본 t-검정 수행
t_test_result <- t.test(extra ~ group, data = sleep, var.equal = TRUE)
# 4. 결과 출력
print(t_test_result)
## 
##  Two Sample t-test
## 
## data:  extra by group
## t = -1.8608, df = 18, p-value = 0.07919
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -3.363874  0.203874
## sample estimates:
## mean in group 1 mean in group 2 
##            0.75            2.33
# sleep 데이터: group 1, group 2 비교
group1 <- subset(sleep, group == 1)$extra
group2 <- subset(sleep, group == 2)$extra

# 대응표본 t-검정 (paired = TRUE)
t.test(group1, group2, paired = TRUE)
## 
##  Paired t-test
## 
## data:  group1 and group2
## t = -4.0621, df = 9, p-value = 0.002833
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -2.4598858 -0.7001142
## sample estimates:
## mean difference 
##           -1.58