## 'data.frame': 60 obs. of 3 variables:
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
## $ dose: num 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
Table 함수를 사용하여서 Cross -Table 확인하기
##
## 0.5 1 2
## OJ 10 10 10
## VC 10 10 10
DescribeBy 사용하여, supplement 별 Length 와 Dosage 기초 통계량 확인
##
## Descriptive statistics by group
## group: OJ
## vars n mean sd median trimmed mad min max range skew kurtosis se
## len 1 30 20.66 6.61 22.7 21.04 5.49 8.2 30.9 22.7 -0.52 -1.03 1.21
## dose* 2 30 2.00 0.83 2.0 2.00 1.48 1.0 3.0 2.0 0.00 -1.60 0.15
## ------------------------------------------------------------
## group: VC
## vars n mean sd median trimmed mad min max range skew kurtosis se
## len 1 30 16.96 8.27 16.5 16.57 9.27 4.2 33.9 29.7 0.28 -0.93 1.51
## dose* 2 30 2.00 0.83 2.0 2.00 1.48 1.0 3.0 2.0 0.00 -1.60 0.15
정규성 확인하기 - qqplot 사용하기
ggplot(ToothGrowth, aes(len))+
geom_histogram(aes(y=..density..), color ="indianred2", fill="white")+
stat_function(fun=dnorm,
args= list(mean =mean(ToothGrowth$len, na.rm = TRUE),
sd = sd(ToothGrowth$len, na.rm =TRUE)))##
## Shapiro-Wilk normality test
##
## data: ToothGrowth$len
## W = 0.96743, p-value = 0.1091
Length VS Supplement
H0 : Supplement (2 종류)에 따라 치아 Length 길이의 차이가 있다.
H1 : Supplement (2 종류)에 따라 치아 Length 길이의 차이가 없다.
##
## Welch Two Sample t-test
##
## data: len by supp
## t = 1.9153, df = 55.309, p-value = 0.06063
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1710156 7.5710156
## sample estimates:
## mean in group OJ mean in group VC
## 20.66333 16.96333
Length VS Suuplement - 0.5 vs 1.0
##
## Welch Two Sample t-test
##
## data: len by dose
## t = -6.4766, df = 37.986, p-value = 1.268e-07
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.983781 -6.276219
## sample estimates:
## mean in group 0.5 mean in group 1
## 10.605 19.735
Length vs Dosage - 1.0 vs 2.0
##
## Welch Two Sample t-test
##
## data: len by dose
## t = -4.9005, df = 37.101, p-value = 1.906e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.996481 -3.733519
## sample estimates:
## mean in group 1 mean in group 2
## 19.735 26.100
# Dosage factor, Supplement Factor 두 개 범주형 자료
ggplot(ToothGrowth, aes(dose, len))+
geom_boxplot(aes(fill = supp))+
theme_grey()두 집단의 평균 차이
Test 1 : Dosage 량이 0.5 이면서, Supplement 가 OJ, VC 인 두 그룹간의 평균 Length의 차이
ToothGrowth %>%
rename(Length = len,
Supplement = supp,
Dosage = dose) -> df1
subset(df1, df1$Supplement== "OJ" & df1$Dosage=="0.5") -> Group_A
subset(df1, df1$Supplement== "VC" & df1$Dosage=="0.5") -> Group_B
t.test(Group_A$Length, Group_B$Length, alternative = "two.sided", paired = FALSE)##
## Welch Two Sample t-test
##
## data: Group_A$Length and Group_B$Length
## t = 3.1697, df = 14.969, p-value = 0.006359
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.719057 8.780943
## sample estimates:
## mean of x mean of y
## 13.23 7.98
Test 2 : Dosage “1.0”
subset(df1, df1$Supplement== "OJ" & df1$Dosage=="1") -> Group_A
subset(df1, df1$Supplement== "VC" & df1$Dosage=="1") -> Group_B
t.test(Group_A$Length, Group_B$Length, alternative = "two.sided", paired = FALSE)##
## Welch Two Sample t-test
##
## data: Group_A$Length and Group_B$Length
## t = 4.0328, df = 15.358, p-value = 0.001038
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.802148 9.057852
## sample estimates:
## mean of x mean of y
## 22.70 16.77
subset(df1, df1$Supplement== "OJ" & df1$Dosage=="2") -> Group_A
subset(df1, df1$Supplement== "VC" & df1$Dosage=="2") -> Group_B
t.test(Group_A$Length, Group_B$Length, alternative = "two.sided", paired = FALSE)##
## Welch Two Sample t-test
##
## data: Group_A$Length and Group_B$Length
## t = -0.046136, df = 14.04, p-value = 0.9639
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.79807 3.63807
## sample estimates:
## mean of x mean of y
## 26.06 26.14
Test Dosage t p-value confidence interval (95%) Comment
A |0.5 (mg) |3.1697 |0.0063 |1.7191 to 8.7809 |Average tooth length is significantly different| B |1.0 (mg) |4.0328 |0.0010 |2.8021 to 9.0579 |Average tooth length is significantly different| C |2.0 (mg) |-0.046 |0.9639 |-3.798 to 3.6381 |At 2mg there is not a significant difference in avg. length|