#과제-문제1
#Data1.csv 파일을 활용합니다.
#EDU(학력수준)별 Happiness 평균차이가 있는지 5% 유의수준으로 검정하시오
#또한 사후분석을 통해서 어느 집단간에 차이가 있는지를 서술하시오
#(단, 사후분석은 method='lsd")방법을 사용합니다.
library(dplyr)
## Warning: 패키지 'dplyr'는 R 버전 4.2.3에서 작성되었습니다
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#데이터 불러오기
df<-read.csv("Data1.csv")
glimpse(df)
## Rows: 1,925
## Columns: 26
## $ Q1 <int> 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, …
## $ Q2 <int> 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 2, 2, …
## $ Q3 <int> 2, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 3, 2, 3, …
## $ Q4 <int> 3, 4, 4, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 2, 4, 4, 4, 2, 2, 4, …
## $ Q5 <int> 4, 4, 2, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 4, 4, 3, 1, 2, …
## $ Q6 <int> 2, 3, 4, 4, 4, 4, 4, 4, 1, 2, 2, 2, 4, 4, 3, 5, 2, 2, 1, 4, …
## $ Q7 <int> 2, 2, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 5, 4, 4, 5, 4, 3, 4, 4, …
## $ Q8 <int> 4, 4, 4, 4, 4, 4, 5, 5, 2, 2, 4, 4, 4, 4, 3, 5, 4, 2, 4, 4, …
## $ Q9 <int> 4, 4, 4, 4, 2, 4, 5, 5, 3, 4, 4, 4, 2, 2, 4, 5, 2, 4, 2, 4, …
## $ Q10 <int> 4, 4, 2, 4, 4, 4, 5, 5, 2, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 3, …
## $ Q11 <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 3, 4, 4, 4, 4, 5, 4, 3, 3, …
## $ Q12 <int> 4, 4, 4, 4, 4, 4, 5, 5, 3, 4, 4, 3, 4, 3, 3, 4, 5, 4, 4, 2, …
## $ Q13 <int> 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 4, 4, 2, 4, 4, 4, 5, 4, 4, 4, …
## $ Q14 <int> 4, 4, 4, 4, 4, 4, 5, 5, 5, 4, 4, 4, 3, 4, 5, 4, 5, 4, 4, 4, …
## $ Q15 <int> 4, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 3, 1, 4, 4, 4, 5, 4, 4, 4, …
## $ Q16 <int> 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, 4, 4, 4, 5, 4, 5, 4, 4, 4, …
## $ Q17 <int> 4, 3, 4, 4, 4, 4, 2, 2, 4, 4, 4, 4, 3, 2, 4, 5, 4, 4, 3, 4, …
## $ Q18 <int> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 2, 4, 4, 4, …
## $ Q19 <int> 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 1, 4, 4, 4, 5, 4, 2, 3, …
## $ Q20 <int> 4, 1, 3, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 4, 5, 5, 4, 2, 4, …
## $ Gender <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, …
## $ EDU <int> 1, 1, 2, 1, 2, 1, 1, 1, 4, 3, 2, 1, 1, 3, 3, 2, 1, 1, 1, 4, …
## $ BF <dbl> 3.4, 4.0, 3.6, 4.2, 4.0, 4.0, 3.6, 3.6, 3.6, 3.2, 4.0, 3.2, …
## $ BM <dbl> 3.2, 3.4, 3.6, 4.0, 3.6, 4.0, 4.6, 4.6, 2.2, 3.2, 3.2, 3.6, …
## $ Happiness <dbl> 4.0, 4.0, 3.8, 4.0, 4.0, 4.0, 4.8, 4.4, 3.8, 4.0, 4.0, 3.4, …
## $ Peace <dbl> 4.0, 2.8, 3.8, 4.0, 4.0, 4.0, 3.8, 2.4, 4.0, 3.2, 4.0, 3.9, …
#Happiness 변수를 숫자형 데이터로 변환하기
df$Happiness <- as.numeric(df$Happiness)
#분석
#EDU(학력수준)별 Happiness 평균차이가 있는지 5% 유의수준으로 검정하시오
#학력수준별 빈도수 확인하기
df %>% count(EDU)
## EDU n
## 1 1 233
## 2 2 472
## 3 3 1022
## 4 4 198
m <- aov(Happiness ~ factor(EDU), data = df)
summary(m)
## Df Sum Sq Mean Sq F value Pr(>F)
## factor(EDU) 3 4.9 1.6346 2.93 0.0325 *
## Residuals 1921 1071.6 0.5578
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#결과 : Pr(>F)=0.0325 < 0.05 즉, 귀무가설 기각
#사후분석
#사후분석을 통해서 어느 집단간에 차이가 있는지를 서술하시오
library(DescTools)
## Warning: 패키지 'DescTools'는 R 버전 4.2.3에서 작성되었습니다
PostHocTest(m,method='lsd')
##
## Posthoc multiple comparisons of means : Fisher LSD
## 95% family-wise confidence level
##
## $`factor(EDU)`
## diff lwr.ci upr.ci pval
## 2-1 -0.11127155 -0.228550079 0.006006979 0.0629 .
## 3-1 -0.03552111 -0.141859816 0.070817594 0.5125
## 4-1 0.06330472 -0.078274980 0.204884422 0.3806
## 3-2 0.07575044 -0.005767232 0.157268111 0.0685 .
## 4-2 0.17457627 0.050551990 0.298600553 0.0058 **
## 4-3 0.09882583 -0.014909339 0.212561003 0.0885 .
##
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#결과 : 4-2 0.17457627 0.050551990 0.298600553 0.0058 **
#4-2 : 2학년과 4학년 간의 평균 차이는 유의하다 (p-value = 0.0058)