t-test.R

####母體平均數的檢定####
#判斷時機一：一組樣本或兩組樣本
#一組樣本-->單一t檢定(one simple t-test)
#兩組樣本-->獨立樣本t檢定(independent samples t-test) 或是相依(配對)樣本t檢定( paired sample t-test )

#判斷時機二:從問題意識判斷檢定的方向性
#當對立假設A!=B，判斷為雙尾檢定:t.test(...,alternative="two.sided")
#當對立假設A>B，判斷為右尾檢定:t.test(...,alternative="greater")
#當對立假設A<B，判斷為左尾檢定:t.test(...,alternative="less")

#判斷時機三:大樣本或小樣本
#若樣本數低於30，要先做常態性檢定shapiro.test()，樣本分佈符合常態分配假設始可使用t檢定，反之則需使用無母數的檢定方法

#判斷時機四：兩組樣本的t檢定，需先檢定兩組樣本變異數是否相等:var.test(依變數~自變數)
#若變異數相等: t.test(...,var.equal = TRUE)
#若變異數不相等: t.test(...,var.equal = FALSE)



####修正：文字框6-2 單一t檢定(大樣本)####

#用t.test()函數
#讀取資料
example6_4 <- read.csv("example6_4.csv", fileEncoding = "big5")
#先畫圖瞭解資料
hist(example6_4$PM2.5值)

boxplot(example6_4$PM2.5值, horizontal = TRUE)

# 撰寫分析步驟
# Step1：分析樣本平均數或建立交叉表
summary(example6_4$PM2.5值)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   58.30   72.22   76.70   78.12   85.58   96.60

# Step2：決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3：寫出虛無假設與對立假設
# H0:μ >= 82。空氣中PM2.5平均大於等於82
# H1:μ < 82。空氣中PM2.5平均小於82
# Step4：決定檢定方法
# 單一樣本t檢定
# Step5：計算檢定值。
t.test(example6_4$PM2.5值,mu=82,alternative="less") #alternative="less"左尾檢定

## 
##  One Sample t-test
## 
## data:  example6_4$PM2.5值
## t = -2.3868, df = 31, p-value = 0.01164
## alternative hypothesis: true mean is less than 82
## 95 percent confidence interval:
##      -Inf 80.87764
## sample estimates:
## mean of x 
##    78.125

# t = -2.3868, df = 31。
# Step6：計算p值
# p-value = 0.01164
# Step7：下決策
# 因為p-value (0.01164)<0.05，所以拒絕虛無假設，對立假設成立：空氣中PM2.5平均小於82。 



####修正:文字框6-3   單一樣本t檢定(小樣本)####
#單一樣本t檢定（使用t.test(x,mu=m)函數）
#讀取資料
example6_5 <- read.csv("example6_5.csv",fileEncoding = "BIG5")
#解決中文欄位顯示錯誤問題, 改欄位名稱為英文(若畫圖顯示正常則不用改欄位名稱)
colnames(example6_5)

## [1] "建材長度"

colnames(example6_5) <- "Length"
#先畫圖瞭解資料
hist(example6_5$Length)

boxplot(example6_5$Length, horizontal = TRUE)

# 撰寫分析步驟
# Step1：分析樣本平均數或建立交叉表
summary(example6_5$Length)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   13.80   14.82   15.00   14.89   15.20   15.40

# Step2：決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3：寫出虛無假設與對立假設
# H0:μ = 15。建材長度等於15cm
# H1:μ != 15。建材長度不等於15cm
# Step4：決定檢定方法
# 因為是小樣本，須先加做小樣本的常態性檢定(若樣本數大於30則不用做) 
# 常態性檢定shapiro.test()
shapiro.test(example6_5$Length)#因為p-value = 0.06992>0.05，樣本符合常態分配假設

## 
##  Shapiro-Wilk normality test
## 
## data:  example6_5$Length
## W = 0.85679, p-value = 0.06992

# 檢定方法：單一樣本t檢定(因為是單一小樣本，且樣本分佈符合常態分配假設)
# Step5：計算檢定值。
t.test(example6_5$Length,mu=15, alternative="two.sided")

## 
##  One Sample t-test
## 
## data:  example6_5$Length
## t = -0.70533, df = 9, p-value = 0.4985
## alternative hypothesis: true mean is not equal to 15
## 95 percent confidence interval:
##  14.5372 15.2428
## sample estimates:
## mean of x 
##     14.89

#註: alternative="two.sided"為雙尾檢定，由於t.test的預設即為alternative="two.sided"，因此可省略不寫
t.test(example6_5$Length,mu=15)

## 
##  One Sample t-test
## 
## data:  example6_5$Length
## t = -0.70533, df = 9, p-value = 0.4985
## alternative hypothesis: true mean is not equal to 15
## 95 percent confidence interval:
##  14.5372 15.2428
## sample estimates:
## mean of x 
##     14.89

# t = -0.70533, df = 9
# Step6：計算p值
# p-value = 0.4985
# Step7：下決策
# 因為p-value (0.4985)>0.05，所以無法拒絕虛無假設，建材長度等於15cm。

####獨立樣本T檢定(大樣本)####
#不同性別的每週看紙本報紙時間是否有顯著差異？
#a1 A1.性別
#c1a    C1a.請問你每週平均有幾天會看紙本報紙?

#準備資料：檢視變數型態
load(file = "tcs2019.RData")
str(tcs2019$a1)#必須是factor型態

##  'haven_labelled' num [1:2000] 2 1 1 2 1 1 1 1 2 2 ...
##  - attr(*, "label")= chr "A1.性別"
##  - attr(*, "format.spss")= chr "F8.0"
##  - attr(*, "labels")= Named num [1:2] 1 2
##   ..- attr(*, "names")= chr [1:2] "男" "女"

summary(tcs2019$c1a)##必須是numeric型態

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   1.403   1.125   7.000

str(tcs2019$c1a)

##  num [1:2000] 0 5 3 0 5 0 2 7 0.5 0 ...
##  - attr(*, "label")= chr "C1a.請問你每週平均有幾天會看紙本報紙?"
##  - attr(*, "format.spss")= chr "F8.1"

#轉換格式：轉換為因子(factor)
tcs2019$a1 <- factor(tcs2019$a1, labels = c("男", "女"))
str(tcs2019$a1)

##  Factor w/ 2 levels "男","女": 2 1 1 2 1 1 1 1 2 2 ...

#先畫盒狀圖比較兩組樣本
#方法一：
plot(tcs2019$a1, tcs2019$c1a)#分組盒狀圖：plot(類別變數，連續變數)

#方法二：
boxplot(tcs2019$c1a~tcs2019$a1)#分組盒狀圖：boxplot(連續變數~類別變數)

# 撰寫分析步驟
# Step1：分析樣本平均數或建立交叉表
summary(tcs2019$a1)

##   男   女 
##  883 1117

summary(tcs2019$c1a)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   1.403   1.125   7.000

#先分組，再計算
#tapply(被分析的欄位(連續變數)，分組依據欄位(類別變數)，計算函數)
tapply(tcs2019$c1a, tcs2019$a1, mean)

##       男       女 
## 1.714043 1.157565

#男上網天數為1.714043天，女性上網天數為1.157565天
# Step2：決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3：寫出虛無假設與對立假設
# H0:μ男 = μ女。不同性別的每週看紙本報紙時間"沒有顯著差異"。
# H1:μ男!= μ女。不同性別的每週看紙本報紙時間"有顯著差異"。
# Step4：決定檢定方法
# 獨立樣本T檢定
# Step5：計算檢定值。
#首先要檢定兩組樣本變異數是否相等
#var.test(依變數~自變數)
var.test(tcs2019$c1a~tcs2019$a1)# (p-value = 1.065e-07)<0.05，變異數不相等。

## 
##  F test to compare two variances
## 
## data:  tcs2019$c1a by tcs2019$a1
## F = 1.401, num df = 882, denom df = 1116, p-value = 1.065e-07
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  1.236984 1.588171
## sample estimates:
## ratio of variances 
##           1.400972

#獨立樣本T檢定(變異數相等設定為var.equal = TRUE，反之為var.equal = FALSE)
#t.test(依變數~自變數, var.equal = FALSE)
t.test(tcs2019$c1a~tcs2019$a1, var.equal = FALSE)

## 
##  Welch Two Sample t-test
## 
## data:  tcs2019$c1a by tcs2019$a1
## t = 4.8101, df = 1724.3, p-value = 1.64e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.3295725 0.7833838
## sample estimates:
## mean in group 男 mean in group 女 
##         1.714043         1.157565

#檢定值t = 4.8101, df = 1724.3。
# Step6：計算p值
#p-value = 1.64e-06
# Step7：下決策
#因為(p-value = 1.64e-06)<0.05，所以拒絕虛無假設，對立假設成立：不同性別的每週看紙本報紙時間"有顯著差異"。


####修正:文字框6-5   獨立小樣本的t檢定####
###前置作業
#讀取資料
example6_7 <- read.csv("example6_7.csv",fileEncoding = "BIG5")
#解決中文欄位顯示錯誤問題, 改欄位名稱為英文(若畫圖顯示正常則不用改欄位名稱)
colnames(example6_7) <- c("A", "B")
#先畫圖瞭解資料
boxplot(example6_7,example6_7$B)

###撰寫分析步驟
# Step1：分析樣本平均數或建立交叉表
summary(example6_7$A)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   19.82   20.00   19.95   20.10   20.50

summary(example6_7$B)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.50   19.80   20.10   20.10   20.35   20.80

# Step2：決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3：寫出虛無假設與對立假設
# H0:機具甲= 機具乙。機具甲和機具乙的零件直徑"沒有"顯著差異。
# H1:機具甲!= 機具乙。機具甲和機具乙的零件直徑"有"顯著差異。
# Step4：決定檢定方法
##因為是小樣本，須先加做小樣本的常態性檢定(若樣本數大於30則不用做)
shapiro.test(example6_7$A)#結果p-value = 0.2235,符合常態分配假設

## 
##  Shapiro-Wilk normality test
## 
## data:  example6_7$A
## W = 0.90079, p-value = 0.2235

shapiro.test(example6_7$B)#結果p-value = 0.6751,符合常態分配假設

## 
##  Shapiro-Wilk normality test
## 
## data:  example6_7$B
## W = 0.95056, p-value = 0.6751

# 檢定方法：獨立樣本t檢定(因為是兩組獨本樣本，且樣本分佈符合常態分配假設)
# Step5：計算檢定值。
#5-1.先做兩組樣本的變異數檢定(無論大小樣本都要做)
var.test(example6_7$A,example6_7$B)#p-value = 0.8493>0.05，假設變異數相等

## 
##  F test to compare two variances
## 
## data:  example6_7$A and example6_7$B
## F = 0.87784, num df = 9, denom df = 9, p-value = 0.8493
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.2180433 3.5341824
## sample estimates:
## ratio of variances 
##          0.8778409

#5-2.接著才是做t檢定(alternative="two.sided" <- 雙尾檢定；var.equal=TRUE <- 假設變異數相等)
t.test(example6_7$A,example6_7$B,alternative="two.sided",var.equal=TRUE)

## 
##  Two Sample t-test
## 
## data:  example6_7$A and example6_7$B
## t = -0.78276, df = 18, p-value = 0.4439
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.5526006  0.2526006
## sample estimates:
## mean of x mean of y 
##     19.95     20.10

#t = -0.78276, df = 18
# Step6：計算p值
# p-value = 0.4439
# Step7：下決策
# 因為p-value (0.4439)>0.05，所以無法拒絕虛無假設：機具甲和機具乙的零件直徑"沒有"顯著差異。

t-test.R

user

2021-04-14