##################################################################################################
# ref : http://www.dodomira.com/2016/04/02/r%EC%9D%84-%EC%82%AC%EC%9A%A9%ED%95%9C-t-test/        #
# t-test                                                                                         #
##################################################################################################
# 1. 두개의 집단에 대한 t-test 를 실시하기 위한 조건 : 등분산성 , 정규성 
a = c(175, 168, 168, 190, 156, 181, 182, 175, 174, 179)
b = c(185, 169, 173, 173, 188, 186, 175, 174, 179, 180)
c = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
#### p-value 가 0.05 보다 크다면, 귀무가설 지지, 귀무가설은 등분산하다 임  
#### p-value 가 0.05 보다 작다면, 귀무가설 기각, 대립가설은 두 집단이 유의미하게 다른 분산을 가짐  
var.test(a,b) # p-value = 0.2834

    F test to compare two variances

data:  a and b
F = 2.1028, num df = 9, denom df = 9, p-value = 0.2834
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.5223017 8.4657950
sample estimates:
ratio of variances 
          2.102784 
var.test(a,c) # p-value = 0.002512

    F test to compare two variances

data:  a and c
F = 9.5224, num df = 9, denom df = 9, p-value = 0.002512
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
  2.365235 38.337224
sample estimates:
ratio of variances 
          9.522424 
qf(.05, length(a) -1, length(b) -1)
[1] 0.3145749
# 스팸밴드 중 스팸유저의 수가 3명, 4명인 집단간의 스팸메세지수의 분산은 등분산이 아니다.
# var.test(filter(cv, usr_cnt == 3)$msg_cnt, filter(cv, usr_cnt == 4)$msg_cnt)  # p-vlaue = 2.826e-05
# 스팸밴드 중 스팸 문구가 3개인 집단과 4개인 집단간의, 스팸메세지수의 분산은 등분산이다.
# var.test(filter(cv, content_cnt == 3)$msg_cnt, filter(cv, content_cnt == 4)$msg_cnt)  # p-value = 0.1217
# 2. independent two sample t-test
#### 서로 다른 두개의 그룹 간 평균의 차이가 유의미 한지 여부를 판단하기 위한 t-test는 독립표본 t-test
#### 수동/자동에 따른 mpg 의 차이가 통계적으로 유의미한가?
automatic_cars_mpg <- mtcars[mtcars$am==1,1 ]
manual_cars_mpg <- mtcars[mtcars$am==0, 1]
#### check first : 등분산성?
var.test(automatic_cars_mpg, manual_cars_mpg) # p-value = 0.06691 > 0.5 , so.. 등분산 

    F test to compare two variances

data:  automatic_cars_mpg and manual_cars_mpg
F = 2.5869, num df = 12, denom df = 18, p-value = 0.06691
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
 0.934280 8.040391
sample estimates:
ratio of variances 
          2.586911 
#### independent two sample t-test 유형 1 문법: t.test(group 1의 관측치, group2의 관측치, t-test 유형, 신뢰범위)
t.test(automatic_cars_mpg, manual_cars_mpg,  paired = FALSE, var.equal = TRUE, conf.level = 0.95)   # p-value : 0.000285 < 0.5 
#### independent two sample t-test 유형 2 문법: t.test(관측치~집단 구분 기준, 데이터프레임, t-test 유형, 신뢰범위)
#### 오토 / 수동 자동차의 mpg 차이는 통계적으로 유의미함 
t.test(mpg ~ am, data = mtcars, var.equal = T, conf.level = .95)   # p-value : 0.000285 < 0.05 
# 3. Paired Sample test
#### 동일한 집단의 전-후 차이 비교 
#### ex> 중간고사 이후 과외를 받은 학생의 점수 변화 
mid = c(16, 20, 21, 22, 23, 22, 27, 25, 27, 28)
final = c(19, 22, 24, 24, 25, 25, 26, 26, 28, 32)
t.test(mid,final, paired=TRUE)   # p-value : 0.001 < 0.5 
# 4. 단일 표본 t-test ( One sample test )
#### 문법: t.test(관측치, alternative = 판별 방향, mu=특정기준, conf.level = 신뢰수준)
#### 학생들의 기말고사 점수가 24점보다 유의하게 높은가?
t.test(final, alternative = "greater", mu = 24, conf.level = .95)  # p-value : 0.1696 > 0.05 , so 유의미하게 높지는 않다. ( 신뢰도 95% 수준 )
t.test(final, alternative = "greater", mu = 23, conf.level = .95)  # p-value : 0.0430 < 0.05 , so 유의미하게 높다. ( 신뢰도 95% 수준 )
t.test(final, alternative = "less", mu = 30, conf.level = .95)     # p-value : 0.0007 < 0.05 , so 유의미하게 낮다. ( 신뢰도 95% 수준 )
library(BSDA)
z.test(final, y = NULL, alternative = "greater", mu = 23, sigma.x = sd(final), sigma.y = NULL, conf.level = 0.95) # p-value : 0.027 
# 번외편. t-statistic formula 와 t-distribution
# Independent Samples Ttest : http://lap.umd.edu/psyc200/handouts/psyc200_0812.pdf
# sample size = 10 , df = 9 인 두개의 샘플 집단을 생각해보자.
size_A <- 10
size_B <- 100
sample_A <- rnorm(size_A)
sample_B <- rnorm(size_B)
# 아래 공식에 따라 t-통계량은 아래와 같이 계산된다. 
print((mean(sample_A)-mean(sample_B)) / sqrt(sd(sample_A)**2/size_A + sd(sample_B)**2/size_B))
[1] -3.690621
print(t.test(sample_A,sample_B)$statistic)
        t 
-3.690621 
# t-statistic 값을 df = 9 에서 1000회 반복해서 구해보자 
tstats = replicate(1000,t.test(rnorm(size_A),rnorm(size_B))$statistic)
range(tstats)
[1] -4.518624  4.709766
# df = 9 에서의 t-distribution 을 그리고, 1000회 반복하여 수행한 t-통계량의 density plot 을 겹쳐보자 
pts = seq(-4.5, 4.5,length=100)
plot(pts,dt(pts, df=size_A+size_B-2), col='red',type='l') # df = df_of_sample_A + df_of_sample_B
lines(density(tstats))

# 위 과정을 반복 수행해보면, t-statistic 값의 확률밀도가, The Student t Distribution 에 근사한다. ( 공식에 따른 값이니까;;;; )
pts = seq(-4.5, 4.5,length=100)
plot(pts,dt(pts, df=size_A + size_B - 2), col='red',type='l') # df = df_of_sample_A + df_of_sample_B
for(i in 0:10) {
  lines(density(replicate(1000,t.test(rnorm(10),rnorm(10))$statistic)))
}

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCiMgcmVmIDogaHR0cDovL3d3dy5kb2RvbWlyYS5jb20vMjAxNi8wNC8wMi9yJUVDJTlEJTg0LSVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSU5Qy10LXRlc3QvICAgICAgICAjCiMgdC10ZXN0ICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAjCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjCgojIDEuIOuRkOqwnOydmCDsp5Hri6jsl5Ag64yA7ZWcIHQtdGVzdCDrpbwg7Iuk7Iuc7ZWY6riwIOychO2VnCDsobDqsbQgOiDrk7HrtoTsgrDshLEgLCDsoJXqt5zshLEgCmEgPSBjKDE3NSwgMTY4LCAxNjgsIDE5MCwgMTU2LCAxODEsIDE4MiwgMTc1LCAxNzQsIDE3OSkKYiA9IGMoMTg1LCAxNjksIDE3MywgMTczLCAxODgsIDE4NiwgMTc1LCAxNzQsIDE3OSwgMTgwKQoKYyA9IGMoMSwgMiwgMywgNCwgNSwgNiwgNywgOCwgOSwgMTApCgojIyMjIHAtdmFsdWUg6rCAIDAuMDUg67O064ukIO2BrOuLpOuptCwg6reA66y06rCA7ISkIOyngOyngCwg6reA66y06rCA7ISk7J2AIOuTseu2hOyCsO2VmOuLpCDsnoQgIAojIyMjIHAtdmFsdWUg6rCAIDAuMDUg67O064ukIOyekeuLpOuptCwg6reA66y06rCA7ISkIOq4sOqwgSwg64yA66a96rCA7ISk7J2AIOuRkCDsp5Hri6jsnbQg7Jyg7J2Y66+47ZWY6rKMIOuLpOuluCDrtoTsgrDsnYQg6rCA7KeQICAKCgoKdmFyLnRlc3QoYSxiKSAjIHAtdmFsdWUgPSAwLjI4MzQgPiAwLjA1CnZhci50ZXN0KGEsYykgIyBwLXZhbHVlID0gMC4wMDI1MTIgPCAwLjA1CnFmKC4wNSwgbGVuZ3RoKGEpIC0xLCBsZW5ndGgoYikgLTEpCmBgYAoKYGBge3J9CiMg7Iqk7Yy467C065OcIOykkSDsiqTtjLjsnKDsoIDsnZgg7IiY6rCAIDPrqoUsIDTrqoXsnbgg7KeR64uo6rCE7J2YIOyKpO2MuOuplOyEuOyngOyImOydmCDrtoTsgrDsnYAg65Ox67aE7IKw7J20IOyVhOuLiOuLpC4KIyB2YXIudGVzdChmaWx0ZXIoY3YsIHVzcl9jbnQgPT0gMykkbXNnX2NudCwgZmlsdGVyKGN2LCB1c3JfY250ID09IDQpJG1zZ19jbnQpICAjIHAtdmxhdWUgPSAyLjgyNmUtMDUKCiMg7Iqk7Yy467C065OcIOykkSDsiqTtjLgg66y46rWs6rCAIDPqsJzsnbgg7KeR64uo6rO8IDTqsJzsnbgg7KeR64uo6rCE7J2YLCDsiqTtjLjrqZTshLjsp4DsiJjsnZgg67aE7IKw7J2AIOuTseu2hOyCsOydtOuLpC4KIyB2YXIudGVzdChmaWx0ZXIoY3YsIGNvbnRlbnRfY250ID09IDMpJG1zZ19jbnQsIGZpbHRlcihjdiwgY29udGVudF9jbnQgPT0gNCkkbXNnX2NudCkgICMgcC12YWx1ZSA9IDAuMTIxNwoKIyAyLiBpbmRlcGVuZGVudCB0d28gc2FtcGxlIHQtdGVzdAojIyMjIOyEnOuhnCDri6Trpbgg65GQ6rCc7J2YIOq3uOujuSDqsIQg7Y+J6reg7J2YIOywqOydtOqwgCDsnKDsnZjrr7gg7ZWc7KeAIOyXrOu2gOulvCDtjJDri6jtlZjquLAg7JyE7ZWcIHQtdGVzdOuKlCDrj4Xrpr3tkZzrs7ggdC10ZXN0CiMjIyMg7IiY64+ZL+yekOuPmeyXkCDrlLDrpbggbXBnIOydmCDssKjsnbTqsIAg7Ya16rOE7KCB7Jy866GcIOycoOydmOuvuO2VnOqwgD8KCmF1dG9tYXRpY19jYXJzX21wZyA8LSBtdGNhcnNbbXRjYXJzJGFtPT0xLDEgXQptYW51YWxfY2Fyc19tcGcgPC0gbXRjYXJzW210Y2FycyRhbT09MCwgMV0KIyMjIyBjaGVjayBmaXJzdCA6IOuTseu2hOyCsOyEsT8KdmFyLnRlc3QoYXV0b21hdGljX2NhcnNfbXBnLCBtYW51YWxfY2Fyc19tcGcpICMgcC12YWx1ZSA9IDAuMDY2OTEgPiAwLjUgLCBzby4uIOuTseu2hOyCsCAKYGBgCgpgYGB7cn0KIyMjIyBpbmRlcGVuZGVudCB0d28gc2FtcGxlIHQtdGVzdCDsnKDtmJUgMSDrrLjrspU6IHQudGVzdChncm91cCAx7J2YIOq0gOy4oey5mCwgZ3JvdXAy7J2YIOq0gOy4oey5mCwgdC10ZXN0IOycoO2YlSwg7Iug66Kw67KU7JyEKQp0LnRlc3QoYXV0b21hdGljX2NhcnNfbXBnLCBtYW51YWxfY2Fyc19tcGcsICBwYWlyZWQgPSBGQUxTRSwgdmFyLmVxdWFsID0gVFJVRSwgY29uZi5sZXZlbCA9IDAuOTUpICAgIyBwLXZhbHVlIDogMC4wMDAyODUgPCAwLjUgCmBgYAoKYGBge3J9CiMjIyMgaW5kZXBlbmRlbnQgdHdvIHNhbXBsZSB0LXRlc3Qg7Jyg7ZiVIDIg66y467KVOiB0LnRlc3Qo6rSA7Lih7LmYfuynkeuLqCDqtazrtoQg6riw7KSALCDrjbDsnbTthLDtlITroIjsnoQsIHQtdGVzdCDsnKDtmJUsIOyLoOuisOuylOychCkKIyMjIyDsmKTthqAgLyDsiJjrj5kg7J6Q64+Z7LCo7J2YIG1wZyDssKjsnbTripQg7Ya16rOE7KCB7Jy866GcIOycoOydmOuvuO2VqCAKdC50ZXN0KG1wZyB+IGFtLCBkYXRhID0gbXRjYXJzLCB2YXIuZXF1YWwgPSBULCBjb25mLmxldmVsID0gLjk1KSAgICMgcC12YWx1ZSA6IDAuMDAwMjg1IDwgMC4wNSAKYGBgCgoKYGBge3J9CiMgMy4gUGFpcmVkIFNhbXBsZSB0ZXN0CiMjIyMg64+Z7J287ZWcIOynkeuLqOydmCDsoIQt7ZuEIOywqOydtCDruYTqtZAgCiMjIyMgZXg+IOykkeqwhOqzoOyCrCDsnbTtm4Qg6rO87Jm466W8IOuwm+ydgCDtlZnsg53snZgg7KCQ7IiYIOuzgO2ZlCAKbWlkID0gYygxNiwgMjAsIDIxLCAyMiwgMjMsIDIyLCAyNywgMjUsIDI3LCAyOCkKZmluYWwgPSBjKDE5LCAyMiwgMjQsIDI0LCAyNSwgMjUsIDI2LCAyNiwgMjgsIDMyKQp0LnRlc3QobWlkLGZpbmFsLCBwYWlyZWQ9VFJVRSkgICAjIHAtdmFsdWUgOiAwLjAwMSA8IDAuNSAKYGBgCgpgYGB7cn0KIyA0LiDri6jsnbwg7ZGc67O4IHQtdGVzdCAoIE9uZSBzYW1wbGUgdGVzdCApCiMjIyMg66y467KVOiB0LnRlc3Qo6rSA7Lih7LmYLCBhbHRlcm5hdGl2ZSA9IO2MkOuzhCDrsKntlqUsIG11Pe2Kueygleq4sOykgCwgY29uZi5sZXZlbCA9IOyLoOuisOyImOykgCkKIyMjIyDtlZnsg53rk6TsnZgg6riw66eQ6rOg7IKsIOygkOyImOqwgCAyNOygkOuztOuLpCDsnKDsnZjtlZjqsowg64aS7J2A6rCAPwp0LnRlc3QoZmluYWwsIGFsdGVybmF0aXZlID0gImdyZWF0ZXIiLCBtdSA9IDI0LCBjb25mLmxldmVsID0gLjk1KSAgIyBwLXZhbHVlIDogMC4xNjk2ID4gMC4wNSAsIHNvIOycoOydmOuvuO2VmOqyjCDrhpLsp4DripQg7JWK64ukLiAoIOyLoOuisOuPhCA5NSUg7IiY7KSAICkKdC50ZXN0KGZpbmFsLCBhbHRlcm5hdGl2ZSA9ICJncmVhdGVyIiwgbXUgPSAyMywgY29uZi5sZXZlbCA9IC45NSkgICMgcC12YWx1ZSA6IDAuMDQzMCA8IDAuMDUgLCBzbyDsnKDsnZjrr7jtlZjqsowg64aS64ukLiAoIOyLoOuisOuPhCA5NSUg7IiY7KSAICkKdC50ZXN0KGZpbmFsLCBhbHRlcm5hdGl2ZSA9ICJsZXNzIiwgbXUgPSAzMCwgY29uZi5sZXZlbCA9IC45NSkgICAgICMgcC12YWx1ZSA6IDAuMDAwNyA8IDAuMDUgLCBzbyDsnKDsnZjrr7jtlZjqsowg64Ku64ukLiAoIOyLoOuisOuPhCA5NSUg7IiY7KSAICkKYGBgCgpgYGB7cn0KbGlicmFyeShCU0RBKQp6LnRlc3QoZmluYWwsIHkgPSBOVUxMLCBhbHRlcm5hdGl2ZSA9ICJncmVhdGVyIiwgbXUgPSAyMywgc2lnbWEueCA9IHNkKGZpbmFsKSwgc2lnbWEueSA9IE5VTEwsIGNvbmYubGV2ZWwgPSAwLjk1KSAjIHAtdmFsdWUgOiAwLjAyNyAKYGBgCgpgYGB7cn0KIyDrsojsmbjtjrguIHQtc3RhdGlzdGljIGZvcm11bGEg7JmAIHQtZGlzdHJpYnV0aW9uCiMgSW5kZXBlbmRlbnQgU2FtcGxlcyBUdGVzdCA6IGh0dHA6Ly9sYXAudW1kLmVkdS9wc3ljMjAwL2hhbmRvdXRzL3BzeWMyMDBfMDgxMi5wZGYKCiMgc2FtcGxlIHNpemUgPSAxMCAsIGRmID0gOSDsnbgg65GQ6rCc7J2YIOyDmO2UjCDsp5Hri6jsnYQg7IOd6rCB7ZW067O07J6QLgpzaXplX0EgPC0gMTAKc2l6ZV9CIDwtIDEwMApzYW1wbGVfQSA8LSBybm9ybShzaXplX0EpCnNhbXBsZV9CIDwtIHJub3JtKHNpemVfQikKCiMg7JWE656YIOqzteyLneyXkCDrlLDrnbwgdC3thrXqs4Trn4nsnYAg7JWE656Y7JmAIOqwmeydtCDqs4TsgrDrkJzri6QuIApwcmludCgobWVhbihzYW1wbGVfQSktbWVhbihzYW1wbGVfQikpIC8gc3FydChzZChzYW1wbGVfQSkqKjIvc2l6ZV9BICsgc2Qoc2FtcGxlX0IpKioyL3NpemVfQikpCnByaW50KHQudGVzdChzYW1wbGVfQSxzYW1wbGVfQikkc3RhdGlzdGljKQoKIyB0LXN0YXRpc3RpYyDqsJLsnYQgZGYgPSA5IOyXkOyEnCAxMDAw7ZqMIOuwmOuzte2VtOyEnCDqtaztlbTrs7TsnpAgCnRzdGF0cyA9IHJlcGxpY2F0ZSgxMDAwLHQudGVzdChybm9ybShzaXplX0EpLHJub3JtKHNpemVfQikpJHN0YXRpc3RpYykKcmFuZ2UodHN0YXRzKQoKIyBkZiA9IDkg7JeQ7ISc7J2YIHQtZGlzdHJpYnV0aW9uIOydhCDqt7jrpqzqs6AsIDEwMDDtmowg67CY67O17ZWY7JesIOyImO2Wie2VnCB0Le2GteqzhOufieydmCBkZW5zaXR5IHBsb3Qg7J2EIOqyueyzkOuztOyekCAKcHRzID0gc2VxKC00LjUsIDQuNSxsZW5ndGg9MTAwKQpwbG90KHB0cyxkdChwdHMsIGRmPXNpemVfQStzaXplX0ItMiksIGNvbD0ncmVkJyx0eXBlPSdsJykgIyBkZiA9IGRmX29mX3NhbXBsZV9BICsgZGZfb2Zfc2FtcGxlX0IKbGluZXMoZGVuc2l0eSh0c3RhdHMpKQpgYGAKIVtdKC9Vc2Vycy9DQS90LXRlc3QtZm9ybXVsYS5qcGcpCgoKCmBgYHtyfQojIOychCDqs7zsoJXsnYQg67CY67O1IOyImO2Wie2VtOuztOuptCwgdC1zdGF0aXN0aWMg6rCS7J2YIO2ZleuloOuwgOuPhOqwgCwgVGhlIFN0dWRlbnQgdCBEaXN0cmlidXRpb24g7JeQIOq3vOyCrO2VnOuLpC4gKCDqs7Xsi53sl5Ag65Sw66W4IOqwkuydtOuLiOq5jDs7OzsgKQpwdHMgPSBzZXEoLTQuNSwgNC41LGxlbmd0aD0xMDApCnBsb3QocHRzLGR0KHB0cywgZGY9c2l6ZV9BICsgc2l6ZV9CIC0gMiksIGNvbD0ncmVkJyx0eXBlPSdsJykgIyBkZiA9IGRmX29mX3NhbXBsZV9BICsgZGZfb2Zfc2FtcGxlX0IKZm9yKGkgaW4gMDoxMCkgewogIGxpbmVzKGRlbnNpdHkocmVwbGljYXRlKDEwMDAsdC50ZXN0KHJub3JtKDEwKSxybm9ybSgxMCkpJHN0YXRpc3RpYykpKQp9CmBgYAoKCgoK