#과제-문제1
#Data1.csv 파일을 활용합니다.
#EDU(학력수준)별 Happiness 평균차이가 있는지 5% 유의수준으로 검정하시오
#또한 사후분석을 통해서 어느 집단간에 차이가 있는지를 서술하시오
#(단, 사후분석은 method='lsd")방법을 사용합니다.

library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#데이터 불러오기
df<-read.csv("Data1.csv")
glimpse(df)
## Rows: 1,925
## Columns: 26
## $ Q1        <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2        <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3        <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4        <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5        <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6        <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7        <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8        <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9        <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10       <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12       <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13       <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14       <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15       <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16       <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17       <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18       <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19       <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20       <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender    <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ EDU       <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF        <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM        <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, …
## $ Peace     <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
#Happiness 변수를 숫자형 데이터로 변환하기
df$Happiness <- as.numeric(df$Happiness)

#분석
#EDU(학력수준)별 Happiness 평균차이가 있는지 5% 유의수준으로 검정하시오
#학력수준별 빈도수 확인하기
df %>% count(EDU)
##   EDU    n
## 1   1  233
## 2   2  472
## 3   3 1022
## 4   4  198
m <- aov(Happiness ~ factor(EDU), data = df)
summary(m)
##               Df Sum Sq Mean Sq F value Pr(>F)  
## factor(EDU)    3    4.9  1.6346    2.93 0.0325 *
## Residuals   1921 1071.6  0.5578                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#결과 : Pr(>F)=0.0325 < 0.05 즉, 귀무가설 기각

#사후분석
#사후분석을 통해서 어느 집단간에 차이가 있는지를 서술하시오

library(DescTools)
## Warning: 패키지 'DescTools'는 R 버전 4.2.3에서 작성되었습니다
PostHocTest(m,method='lsd')
## 
##   Posthoc multiple comparisons of means : Fisher LSD 
##     95% family-wise confidence level
## 
## $`factor(EDU)`
##            diff       lwr.ci      upr.ci   pval    
## 2-1 -0.11127155 -0.228550079 0.006006979 0.0629 .  
## 3-1 -0.03552111 -0.141859816 0.070817594 0.5125    
## 4-1  0.06330472 -0.078274980 0.204884422 0.3806    
## 3-2  0.07575044 -0.005767232 0.157268111 0.0685 .  
## 4-2  0.17457627  0.050551990 0.298600553 0.0058 ** 
## 4-3  0.09882583 -0.014909339 0.212561003 0.0885 .  
## 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#결과 : 4-2  0.17457627  0.050551990 0.298600553 0.0058 ** 
#4-2 : 2학년과 4학년 간의 평균 차이는 유의하다 (p-value = 0.0058)