#1110730305
#假設檢定測驗
#請執行以下程式碼:
#隨機抽取某校60位同學,調查成績如下:
student.id <- paste("student", 1:60, sep=".")
set.seed(123)
#性別
gender <- sample(c("male", "male", "male", "female"),length(student.id),replace = TRUE)
#科系
department <- sample(c("J", "P", "A", "S"),length(student.id),replace = TRUE)
#媒體識讀成績
media <- round(rnorm(length(student.id), mean=65, sd=10), 0)
#英文科成績
english <- sample(1:100, length(student.id), replace = TRUE)
#成績資料
grade <- data.frame(student.id,gender, department, media, english)
#一、上次考試的媒體識讀平均成績為62分,請問本次考試是否有顯著進步?(單一樣本)
hist(grade$media)

boxplot(grade$media, horizontal = TRUE)

# Step1:分析樣本平均數或建立交叉表
summary(media)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.00 59.00 64.00 64.67 69.25 87.00
mean(grade$media)
## [1] 64.66667
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3:寫出虛無假設與對立假設
# H0:μ 62。媒體識讀平均成績小於等於62分。
# H1:μ ! 62 .媒體識讀平均成績大於62分。
# Step4:決定檢定方法
# 單一樣本t檢定
# Step5:計算檢定值。
t.test(media, mu=62, alternative="greater")#alternative="greater"右尾檢定
##
## One Sample t-test
##
## data: media
## t = 2.3482, df = 59, p-value = 0.01112
## alternative hypothesis: true mean is greater than 62
## 95 percent confidence interval:
## 62.76893 Inf
## sample estimates:
## mean of x
## 64.66667
# t = 2.3482, df = 59。 #df叫做自由度(n-1) #(必寫)
# Step6:計算p值
# p-value =0.01112
# Step7:下決策
# 因為p-value(0.01112)<0.05,所以拒絕虛無假設,對立假設成立:本次考試有顯著進步
#二、不同性別的英文成績是否有顯著差異?(獨立樣本)
str(grade$gender)#必須是factor型態
## chr [1:60] "male" "male" "male" "male" "male" "male" "male" "male" "male" ...
summary(grade$english)##必須是numeric型態
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 23.00 36.50 44.07 72.25 94.00
str(grade$english)
## int [1:60] 14 16 87 33 40 40 10 89 72 82 ...
#轉換格式:轉換為因子(factor)
grade$gender<- factor(grade$gender, labels = c("男", "女"))
str(grade$gender)
## Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
#先畫盒狀圖比較兩組樣本
plot(grade$gender, grade$english)#分組盒狀圖:plot(類別變數,連續變數)
# 撰寫分析步驟
# Step1:分析樣本平均數或建立交叉表
summary(grade$gender)
## 男 女
## 9 51
summary(grade$english)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 23.00 36.50 44.07 72.25 94.00
#先分組,再計算
#tapply(被分析的欄位(連續變數),分組依據欄位(類別變數),計算函數)
tapply(grade$english, grade$gender, mean)
## 男 女
## 30.77778 46.41176
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3:寫出虛無假設與對立假設
# H0:μ男 = μ女。不同性別的英文成績沒有顯著差異。
# H1:μ男!= μ女。不同性別的英文成績有顯著差異。
# Step4:決定檢定方法
# 獨立樣本T檢定
# Step5:計算檢定值。
#首先要檢定兩組樣本變異數是否相等
#var.test(依變數~自變數)
var.test(grade$english~grade$gender)# (p-value = 0.04522)<0.05,變異數相等。
##
## F test to compare two variances
##
## data: grade$english by grade$gender
## F = 0.25435, num df = 8, denom df = 50, p-value = 0.04522
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1034804 0.9682337
## sample estimates:
## ratio of variances
## 0.2543488
#獨立樣本T檢定(變異數相等設定為var.equal = TRUE,反之為var.equal = FALSE)
#t.test(依變數~自變數, var.equal = TRUE)
t.test(grade$english~grade$gender, var.equal = TRUE,alternative="two.sided")
##
## Two Sample t-test
##
## data: grade$english by grade$gender
## t = -1.4973, df = 58, p-value = 0.1397
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -36.534391 5.266417
## sample estimates:
## mean in group 男 mean in group 女
## 30.77778 46.41176
# Step6:計算p值
#p-value = 0.1397
# Step7:下決策
#因為(p-value = 0.1397)>0.05,所以無法拒絕虛無假設:不同性別的英文成績沒有顯著差異
#三、不同科系的媒體識讀成績是否有顯著差異?
summary(grade$department)
## Length Class Mode
## 60 character character
str(grade$department)
## chr [1:60] "J" "S" "S" "A" "J" "P" "J" "J" "A" "J" "P" "J" "A" "J" "A" "P" ...
summary(grade$media)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.00 59.00 64.00 64.67 69.25 87.00
str(grade$media)
## num [1:60] 69 60 62 55 54 68 69 66 74 86 ...
#Step1:分析樣本平均數或建立交叉表
#先分組,再計算
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
str(grade)
## 'data.frame': 60 obs. of 5 variables:
## $ student.id: chr "student.1" "student.2" "student.3" "student.4" ...
## $ gender : Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
## $ department: chr "J" "S" "S" "A" ...
## $ media : num 69 60 62 55 54 68 69 66 74 86 ...
## $ english : int 14 16 87 33 40 40 10 89 72 82 ...
mytable <- grade %>%
group_by(grade$department) %>%
summarise(次數= n(), 平均數 = mean(grade$media), 標準差 = sd(grade$media))
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3:寫出虛無假設與對立假設
# H0:不同科系的媒體視讀成績"沒有顯著差異"。
# H1:不同科系的媒體視讀成績"有顯著差異"。
# Step4:決定檢定方法
# 單因子變異數分析 one-way ANOVA
# Step5:計算檢定值。
model_1w<-aov(grade$media~grade$department)
summary(model_1w)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade$department 3 213 70.97 0.913 0.441
## Residuals 56 4352 77.72
#檢定值F = 0.913
# Step6:計算p值
#p-value = 0.441
# Step7:下決策
#因(p-value = 0.441)<0.05,無法拒絕虛無假設:不同科系的媒體視讀成績"沒有顯著差異"。
# Step8:如果不能拒絕虛無假設,則不需事後比較!!
#四、男生的英文成績是否低於女生?
#muboy大於等於mugirl
str(grade$gender)#必須是factor型態
## Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
summary(grade$english)##必須是numeric型態
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 23.00 36.50 44.07 72.25 94.00
str(grade$english)
## int [1:60] 14 16 87 33 40 40 10 89 72 82 ...
#轉換格式:轉換為因子(factor)
grade$gender<- factor(grade$gender, labels = c("男", "女"))
str(grade$gender)
## Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
#先畫盒狀圖比較兩組樣本
plot(grade$gender, grade$english)#分組盒狀圖:plot(類別變數,連續變數)

# 撰寫分析步驟
# Step1:分析樣本平均數或建立交叉表
summary(grade$gender)
## 男 女
## 9 51
summary(grade$english)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 23.00 36.50 44.07 72.25 94.00
#先分組,再計算
#tapply(被分析的欄位(連續變數),分組依據欄位(類別變數),計算函數)
tapply(grade$english, grade$gender, mean)
## 男 女
## 30.77778 46.41176
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)
# p<0.05
# Step3:寫出虛無假設與對立假設
# H0:男生的成績有低於女生。
# H1:男生的成績沒有低於女生。
# Step4:決定檢定方法
# 獨立樣本T檢定
# Step5:計算檢定值。
#首先要檢定兩組樣本變異數是否相等
#var.test(依變數~自變數)
var.test(grade$english~grade$gender)# (p-value = 0.04522)<0.05,變異數相等。
##
## F test to compare two variances
##
## data: grade$english by grade$gender
## F = 0.25435, num df = 8, denom df = 50, p-value = 0.04522
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1034804 0.9682337
## sample estimates:
## ratio of variances
## 0.2543488
#獨立樣本T檢定(變異數相等設定為var.equal = TRUE,反之為var.equal = FALSE)
#t.test(依變數~自變數, var.equal = TRUE)
t.test(grade$english~grade$gender, var.equal = TRUE,alternative="greater")
##
## Two Sample t-test
##
## data: grade$english by grade$gender
## t = -1.4973, df = 58, p-value = 0.9301
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -33.08706 Inf
## sample estimates:
## mean in group 男 mean in group 女
## 30.77778 46.41176
# Step6:計算p值
#p-value = 0.9301
# Step7:下決策
#因(p-value = 0.9301)>0.05,無法拒絕虛無假設:男生的成績有低於女生
#五、該校的男女比例是否有顯著差異?(卡方)
#step1
table(gender)
## gender
## female male
## 9 51
x <- table(gender)
# step2:決定顯著水準
# p<0.05
# Step3:寫出虛無假設與對立假設
# H0:男女比例沒有顯著差異。
# H1:男女比例有顯著差異。
# Step4:決定檢定方法
# 卡方適合度檢定
# Step5:計算檢定值。
chisq.test(x)
##
## Chi-squared test for given probabilities
##
## data: x
## X-squared = 29.4, df = 1, p-value = 5.888e-08
# X-squared = 29.4, df = 1。
# Step6:計算p值
# p-value = 5.888e-08
# Step7:下決策
# 因p-value (5.888e-08)<0.05,拒絕虛無假設,對立假設成立:男女比例有顯著差異
#六、不同科系的男女比例是否有差異?(卡方獨立性檢定)
#step1
y <- table(department,gender)
y
## gender
## department female male
## A 2 12
## J 0 15
## P 4 13
## S 3 11
# step2:決定顯著水準
# p<0.05
# Step3:寫出虛無假設與對立假設
# H0:不同科系男女比例沒有差異。
# H1:不同科系男女比例有差異。
# Step4:決定檢定方法
# 卡方獨立性檢定
# Step5:計算檢定值。
chisq.test(y)
## Warning in chisq.test(y): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: y
## X-squared = 4.0765, df = 3, p-value = 0.2533
# X-squared = 4.0765, df = 3。
# Step6:計算p值
# p-value = 0.2533
# Step7:下決策
# 因p-value (0.2533)>0.05,無法拒絕虛無假設:不同科系男女比例沒有差異。