#1110730305
#假設檢定測驗
#請執行以下程式碼:
#隨機抽取某校60位同學,調查成績如下:
student.id <- paste("student", 1:60, sep=".")
set.seed(123)
#性別
gender <- sample(c("male", "male", "male", "female"),length(student.id),replace = TRUE)
#科系
department <- sample(c("J", "P", "A", "S"),length(student.id),replace = TRUE)
#媒體識讀成績
media <- round(rnorm(length(student.id), mean=65, sd=10), 0)
#英文科成績
english <- sample(1:100, length(student.id), replace = TRUE)
#成績資料
grade <- data.frame(student.id,gender, department, media, english)

#一、上次考試的媒體識讀平均成績為62分,請問本次考試是否有顯著進步?(單一樣本)
hist(grade$media)

boxplot(grade$media, horizontal = TRUE)

# Step1:分析樣本平均數或建立交叉表

summary(media)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   42.00   59.00   64.00   64.67   69.25   87.00
mean(grade$media)
## [1] 64.66667
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)

# p<0.05

# Step3:寫出虛無假設與對立假設

# H0:μ  62。媒體識讀平均成績小於等於62分。

# H1:μ ! 62 .媒體識讀平均成績大於62分。

# Step4:決定檢定方法

# 單一樣本t檢定

# Step5:計算檢定值。

t.test(media, mu=62, alternative="greater")#alternative="greater"右尾檢定
## 
##  One Sample t-test
## 
## data:  media
## t = 2.3482, df = 59, p-value = 0.01112
## alternative hypothesis: true mean is greater than 62
## 95 percent confidence interval:
##  62.76893      Inf
## sample estimates:
## mean of x 
##  64.66667
# t = 2.3482, df = 59。 #df叫做自由度(n-1) #(必寫)

# Step6:計算p值

# p-value =0.01112

# Step7:下決策

# 因為p-value(0.01112)<0.05,所以拒絕虛無假設,對立假設成立:本次考試有顯著進步


#二、不同性別的英文成績是否有顯著差異?(獨立樣本)
str(grade$gender)#必須是factor型態
##  chr [1:60] "male" "male" "male" "male" "male" "male" "male" "male" "male" ...
summary(grade$english)##必須是numeric型態
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   23.00   36.50   44.07   72.25   94.00
str(grade$english)
##  int [1:60] 14 16 87 33 40 40 10 89 72 82 ...
#轉換格式:轉換為因子(factor)

grade$gender<- factor(grade$gender, labels = c("男", "女"))

str(grade$gender)
##  Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
#先畫盒狀圖比較兩組樣本

plot(grade$gender, grade$english)#分組盒狀圖:plot(類別變數,連續變數)



# 撰寫分析步驟

# Step1:分析樣本平均數或建立交叉表

summary(grade$gender)
## 男 女 
##  9 51
summary(grade$english)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   23.00   36.50   44.07   72.25   94.00
#先分組,再計算

#tapply(被分析的欄位(連續變數),分組依據欄位(類別變數),計算函數)

tapply(grade$english, grade$gender, mean)
##       男       女 
## 30.77778 46.41176
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)

# p<0.05

# Step3:寫出虛無假設與對立假設

# H0:μ男 = μ女。不同性別的英文成績沒有顯著差異。

# H1:μ男!= μ女。不同性別的英文成績有顯著差異。

# Step4:決定檢定方法

# 獨立樣本T檢定

# Step5:計算檢定值。

#首先要檢定兩組樣本變異數是否相等

#var.test(依變數~自變數)

var.test(grade$english~grade$gender)# (p-value = 0.04522)<0.05,變異數相等。
## 
##  F test to compare two variances
## 
## data:  grade$english by grade$gender
## F = 0.25435, num df = 8, denom df = 50, p-value = 0.04522
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1034804 0.9682337
## sample estimates:
## ratio of variances 
##          0.2543488
#獨立樣本T檢定(變異數相等設定為var.equal = TRUE,反之為var.equal = FALSE)

#t.test(依變數~自變數, var.equal = TRUE)

t.test(grade$english~grade$gender, var.equal = TRUE,alternative="two.sided")
## 
##  Two Sample t-test
## 
## data:  grade$english by grade$gender
## t = -1.4973, df = 58, p-value = 0.1397
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -36.534391   5.266417
## sample estimates:
## mean in group 男 mean in group 女 
##         30.77778         46.41176
# Step6:計算p值

#p-value = 0.1397

# Step7:下決策

#因為(p-value = 0.1397)>0.05,所以無法拒絕虛無假設:不同性別的英文成績沒有顯著差異

#三、不同科系的媒體識讀成績是否有顯著差異?
summary(grade$department)
##    Length     Class      Mode 
##        60 character character
str(grade$department) 
##  chr [1:60] "J" "S" "S" "A" "J" "P" "J" "J" "A" "J" "P" "J" "A" "J" "A" "P" ...
summary(grade$media)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   42.00   59.00   64.00   64.67   69.25   87.00
str(grade$media)
##  num [1:60] 69 60 62 55 54 68 69 66 74 86 ...
#Step1:分析樣本平均數或建立交叉表

#先分組,再計算

library(dplyr) 
## Warning: package 'dplyr' was built under R version 4.0.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
str(grade)
## 'data.frame':    60 obs. of  5 variables:
##  $ student.id: chr  "student.1" "student.2" "student.3" "student.4" ...
##  $ gender    : Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
##  $ department: chr  "J" "S" "S" "A" ...
##  $ media     : num  69 60 62 55 54 68 69 66 74 86 ...
##  $ english   : int  14 16 87 33 40 40 10 89 72 82 ...
mytable <- grade %>%  
  
group_by(grade$department) %>% 
  
summarise(次數= n(), 平均數 = mean(grade$media), 標準差 = sd(grade$media))
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)

# p<0.05

# Step3:寫出虛無假設與對立假設

# H0:不同科系的媒體視讀成績"沒有顯著差異"。

# H1:不同科系的媒體視讀成績"有顯著差異"。

# Step4:決定檢定方法

# 單因子變異數分析  one-way ANOVA

# Step5:計算檢定值。

model_1w<-aov(grade$media~grade$department)

summary(model_1w)
##                  Df Sum Sq Mean Sq F value Pr(>F)
## grade$department  3    213   70.97   0.913  0.441
## Residuals        56   4352   77.72
#檢定值F = 0.913

# Step6:計算p值

#p-value = 0.441

# Step7:下決策

#因(p-value = 0.441)<0.05,無法拒絕虛無假設:不同科系的媒體視讀成績"沒有顯著差異"。

# Step8:如果不能拒絕虛無假設,則不需事後比較!!
#四、男生的英文成績是否低於女生?
#muboy大於等於mugirl

str(grade$gender)#必須是factor型態
##  Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
summary(grade$english)##必須是numeric型態
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   23.00   36.50   44.07   72.25   94.00
str(grade$english)
##  int [1:60] 14 16 87 33 40 40 10 89 72 82 ...
#轉換格式:轉換為因子(factor)

grade$gender<- factor(grade$gender, labels = c("男", "女"))

str(grade$gender)
##  Factor w/ 2 levels "男","女": 2 2 2 2 2 2 2 2 2 2 ...
#先畫盒狀圖比較兩組樣本

plot(grade$gender, grade$english)#分組盒狀圖:plot(類別變數,連續變數)

# 撰寫分析步驟

# Step1:分析樣本平均數或建立交叉表

summary(grade$gender)
## 男 女 
##  9 51
summary(grade$english)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   23.00   36.50   44.07   72.25   94.00
#先分組,再計算

#tapply(被分析的欄位(連續變數),分組依據欄位(類別變數),計算函數)

tapply(grade$english, grade$gender, mean)
##       男       女 
## 30.77778 46.41176
# Step2:決定顯著水準( 0.1, 0.05, 0.01, 0.001)

# p<0.05

# Step3:寫出虛無假設與對立假設

# H0:男生的成績有低於女生。

# H1:男生的成績沒有低於女生。

# Step4:決定檢定方法

# 獨立樣本T檢定

# Step5:計算檢定值。

#首先要檢定兩組樣本變異數是否相等

#var.test(依變數~自變數)

var.test(grade$english~grade$gender)# (p-value = 0.04522)<0.05,變異數相等。
## 
##  F test to compare two variances
## 
## data:  grade$english by grade$gender
## F = 0.25435, num df = 8, denom df = 50, p-value = 0.04522
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.1034804 0.9682337
## sample estimates:
## ratio of variances 
##          0.2543488
#獨立樣本T檢定(變異數相等設定為var.equal = TRUE,反之為var.equal = FALSE)

#t.test(依變數~自變數, var.equal = TRUE)

t.test(grade$english~grade$gender, var.equal = TRUE,alternative="greater")
## 
##  Two Sample t-test
## 
## data:  grade$english by grade$gender
## t = -1.4973, df = 58, p-value = 0.9301
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -33.08706       Inf
## sample estimates:
## mean in group 男 mean in group 女 
##         30.77778         46.41176
# Step6:計算p值

#p-value = 0.9301

# Step7:下決策

#因(p-value = 0.9301)>0.05,無法拒絕虛無假設:男生的成績有低於女生

#五、該校的男女比例是否有顯著差異?(卡方)
#step1
table(gender)
## gender
## female   male 
##      9     51
x <- table(gender)

# step2:決定顯著水準

# p<0.05

# Step3:寫出虛無假設與對立假設

# H0:男女比例沒有顯著差異。

# H1:男女比例有顯著差異。

# Step4:決定檢定方法

# 卡方適合度檢定

# Step5:計算檢定值。

chisq.test(x)
## 
##  Chi-squared test for given probabilities
## 
## data:  x
## X-squared = 29.4, df = 1, p-value = 5.888e-08
# X-squared = 29.4, df = 1。

# Step6:計算p值

# p-value = 5.888e-08

# Step7:下決策

# 因p-value (5.888e-08)<0.05,拒絕虛無假設,對立假設成立:男女比例有顯著差異

#六、不同科系的男女比例是否有差異?(卡方獨立性檢定)
#step1
y <- table(department,gender)
y                                
##           gender
## department female male
##          A      2   12
##          J      0   15
##          P      4   13
##          S      3   11
# step2:決定顯著水準

# p<0.05

# Step3:寫出虛無假設與對立假設

# H0:不同科系男女比例沒有差異。

# H1:不同科系男女比例有差異。

# Step4:決定檢定方法

# 卡方獨立性檢定

# Step5:計算檢定值。

chisq.test(y)
## Warning in chisq.test(y): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  y
## X-squared = 4.0765, df = 3, p-value = 0.2533
# X-squared = 4.0765, df = 3。

# Step6:計算p值

# p-value = 0.2533

# Step7:下決策

# 因p-value (0.2533)>0.05,無法拒絕虛無假設:不同科系男女比例沒有差異。