독립표본(Independent Sample) t-test

two-sided test: alternative = c(“two.sided”) =#귀무가설#1.96

1.데이터 불러오기

ist<-read.csv("C:/Users/wun99/Desktop/Summer Study/R통계분석-심흥섭/RStat/Ch07.평균차이검정(t-test)/data/02.IST.csv",
              header=TRUE,    #첫번째 행값을 관측항목으로 지정
              na.strings=".") #"."을 모두 NA로 취급한다는 뜻

str(ist) #t_group은 범주화변수이기 때문에 factor로 바꿔야함.

## 'data.frame':    60 obs. of  2 variables:
##  $ t_group: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ t_time : int  48187 47245 51020 50732 52416 49278 38214 46742 48706 54280 ...

attach(ist) #객체연결 <->detach

02. 기본통계치 확인: summary or describe(psych패키지)

tapply(t_time,t_group,summary) #톡톡치다/박자를 맞추다.

## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   38214   46590   49047   48671   50948   55750 
## 
## $`2`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   41852   48928   51396   51378   53310   59299

library(psych)
describeBy(t_time,t_group,mat=T) #mat=T 비교가능하게 나열

##     item group1 vars  n     mean       sd  median  trimmed      mad   min
## X11    1      1    1 30 48670.57 3607.118 49047.0 48864.88 3274.322 38214
## X12    2      2    1 30 51377.60 4197.600 51395.5 51459.54 3640.524 41852
##       max range        skew   kurtosis       se
## X11 55750 17536 -0.61983820  0.6818221 658.5667
## X12 59299 17447 -0.08715409 -0.5159939 766.3734

03. 그래프 그리기(박스그래프,히스토그램)

opar<-par(no.readonly = TRUE)
layout(matrix(c(1,1,2,3),2,2,byrow=TRUE))
boxplot(t_time~t_group)
#hist(t_time[t_group=="정연타이어"]) 마크다운에서 오류발생
#hist(t_time[t_group=="동규타이어"]) 마크다운에서 오류발생 
par(opar)

04. 통계분석

####등분산 검정->집단이 두개이므로 해야함

var.test(t_time~t_group,data=ist) #등분산성 성립, p값이 0.05보다 크기 떄문

## 
##  F test to compare two variances
## 
## data:  t_time by t_group
## F = 0.73845, num df = 29, denom df = 29, p-value = 0.4192
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3514742 1.5514716
## sample estimates:
## ratio of variances 
##          0.7384458

t.test(t_time~t_group,
       data=ist,
       alternative=c("two.sided"),
       var.equal=TRUE,  #등분산이면 var.eqaul=TRUE, 이분산이면 FALSE
       conf.level=0.95)

## 
##  Two Sample t-test
## 
## data:  t_time by t_group
## t = -2.679, df = 58, p-value = 0.009593
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4729.6975  -684.3692
## sample estimates:
## mean in group 1 mean in group 2 
##        48670.57        51377.60

detach(ist)

05.통계결과 그래프

# 정연타이어
x=48760.57
se=658.5667
data<-rnorm(1000,x,se)
data<-sort(data)
plot(data,dnorm(data,x,se),col="blue",type="l",main="자통차회사별 타이어수명",xlim=c(45000,55000),ylim=c(0,0.0006))
abline(v=x,col="red",lty=3)
par(new=T) #그래프 겹쳐서 표현하기

#동규타이어
x=51377.60
se=766.3734
data<-rnorm(1000,x,se)
data<-sort(data)
plot(data,dnorm(data,x,se),type="l",col="red",
     xlim=c(45000,55000),ylim = c(0,0.0006)) #ylim은 p-value값을 보고 견적을 정하는 것.
abline(v=x,col="red",lty=3) #v는 line을 세워서 삽입하라고 하는 것.