Chi-squared-test.R

# Chi-squared test 卡方檢定
# ===================


####文字框7-1   期望次數相等時的適合度檢定####

# 期望次數相等時的適合度檢定
#load("C:/example/ch7/example7_1.RData")
example7_1 <- read.csv("example7_1.csv", fileEncoding = "big5")
example7_1

##   飲料類型 人數
## 1 碳酸飲料  525
## 2   礦泉水  550
## 3     果汁  470
## 4     其他  455

chisq.test(example7_1$人數)

## 
##  Chi-squared test for given probabilities
## 
## data:  example7_1$人數
## X-squared = 12.1, df = 3, p-value = 0.007048

####補充：一個類別變數的適合度檢定(卡方適合度檢定)####
#讀取資料
example7_1 <- read.csv("example7_1.csv", fileEncoding = "big5")
example7_1

##   飲料類型 人數
## 1 碳酸飲料  525
## 2   礦泉水  550
## 3     果汁  470
## 4     其他  455

#先畫圖瞭解資料
#方法一：
barplot(example7_1$人數)
#方法二
library(ggplot2)

ggplot(data=example7_1, aes(x=飲料類型, y=人數, fill = 飲料類型))+
    geom_col()

# 撰寫分析步驟
# Step1：分析樣本平均數或建立交叉表
# 因本資料已建立次數分配表，故省略
# Step2：決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3：寫出虛無假設與對立假設
# H0:消費者對不同飲料無顯著偏好
# H1:消費者對不同飲料有顯著偏好
# Step4：決定檢定方法
# 卡方適合度檢定
# Step5：計算檢定值。
chisq.test(example7_1$人數)

## 
##  Chi-squared test for given probabilities
## 
## data:  example7_1$人數
## X-squared = 12.1, df = 3, p-value = 0.007048

# X-squared = 12.1, df = 3。
# Step6：計算p值
# p-value = 0.007048
# Step7：下決策
# 因為p-value (0.007048)<0.05，所以拒絕虛無假設，對立假設成立：消費者對不同飲料有顯著偏好。 






####文字框7-2   期望次數不相等時的適合度檢定####

# 期望次數不等時的適合度檢定
#load("C:/example/ch7/example7_2.RData")
example7_2 <- read.csv("example7_2.csv", fileEncoding = "big5")
example7_2

##   受教育程度 離婚家庭數 期望比例
## 1 小學及以下         30     0.20
## 2       初中        110     0.35
## 3       高中         80     0.25
## 4       大學         25     0.12
## 5     研究生         15     0.08

chisq.test(example7_2$離婚家庭數,p=example7_2$期望比例)

## 
##  Chi-squared test for given probabilities
## 
## data:  example7_2$離婚家庭數
## X-squared = 19.586, df = 4, p-value = 0.0006028

####文字框7-3   列聯表獨立性檢定####

# 列聯表獨立性檢定
#load("C:/example/ch7/example7_3.RData")
example7_3 <- read.csv("example7_3.csv", fileEncoding = "big5")
head(example7_3)

##   滿意度 地區
## 1   滿意 北部
## 2   滿意 北部
## 3   滿意 北部
## 4   滿意 北部
## 5   滿意 北部
## 6   滿意 北部

count<-table(example7_3);count

##         地區
## 滿意度   中部 北部 南部
##   不滿意   82   34   65
##   滿意    158  126   35

chisq.test(count)

## 
##  Pearson's Chi-squared test
## 
## data:  count
## X-squared = 51.827, df = 2, p-value = 5.572e-12

####補充：列聯表獨立性檢定(卡方獨立性檢定)####
#讀取資料
example7_3 <- read.csv("example7_3.csv", fileEncoding = "big5")
head(example7_3,10)

##    滿意度 地區
## 1    滿意 北部
## 2    滿意 北部
## 3    滿意 北部
## 4    滿意 北部
## 5    滿意 北部
## 6    滿意 北部
## 7    滿意 北部
## 8    滿意 北部
## 9    滿意 北部
## 10   滿意 北部

#先畫圖瞭解資料
#方法一：
count <- table(example7_3$滿意度,example7_3$地區)
count

##         
##          中部 北部 南部
##   不滿意   82   34   65
##   滿意    158  126   35

barplot(count)

#方法二
library(ggplot2)
ggplot(data=example7_3, aes(x=地區, y=滿意度, fill = 滿意度))+
    geom_col()

# 撰寫分析步驟
# Step1：分析樣本平均數或建立交叉表
count <- table(example7_3$滿意度,example7_3$地區)
count

##         
##          中部 北部 南部
##   不滿意   82   34   65
##   滿意    158  126   35

# Step2：決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3：寫出虛無假設與對立假設
# H0:不同地區的網購客戶滿意度沒有顯著差異
# H1:不同地區的網購客戶滿意度有顯著差異
# Step4：決定檢定方法
# 卡方獨立性檢定
# Step5：計算檢定值。
chisq.test(count)

## 
##  Pearson's Chi-squared test
## 
## data:  count
## X-squared = 51.827, df = 2, p-value = 5.572e-12

# X-squared = 51.827, df = 2。
# Step6：計算p值
# p-value = 5.572e-12
# Step7：下決策
# 因為p-value (5.572e-12)<0.05，所以拒絕虛無假設，對立假設成立：不同地區的網購客戶滿意度有顯著差異。

Chi-squared-test.R

user

2021-04-14