df<-read.csv("df.csv")
names(df)
## [1] "癤퓆um" "headcount" "composition" "Sex" "Age"
## [6] "Career" "Task" "Edu" "smoke" "disease"
## [11] "harm" "edu_imp" "edu_charge" "edu_time" "edu_need"
## [16] "test" "place" "clean_use" "clean_no" "clean"
## [21] "skill" "recruit" "system_need" "rule_1" "rule_2"
## [26] "rule_3" "rule_4" "rule_5" "tail_29_1" "tail_29_2"
## [31] "tail_29_3" "tail_29_4" "tail_29_5" "comply_1" "comply_2"
## [36] "comply_3" "comply_4" "comply_5" "tail_31_1" "tail_31_2"
## [41] "tail_31_3" "tail_31_4" "tail_31_5" "rule_effect" "sales"
## [46] "year_no_1" "year_no_2" "part_1" "part_2" "part_3"
## [51] "part_4"
df1<-df[,c(6,11)]
str(df1)
## 'data.frame': 115 obs. of 2 variables:
## $ Career: int 5 3 3 3 2 5 3 2 1 4 ...
## $ harm : int 1 1 2 2 2 5 2 1 1 1 ...
df1$Career<-as.factor(df1$Career)
table(df1$Career)
##
## 1 2 3 4 5 15
## 19 46 27 13 9 1
head(df1)
## Career harm
## 1 5 1
## 2 3 1
## 3 3 2
## 4 3 2
## 5 2 2
## 6 5 5
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## Warning: 패키지 'ggplot2'는 R 버전 4.1.3에서 작성되었습니다
## Warning: 패키지 'tidyr'는 R 버전 4.1.3에서 작성되었습니다
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
df2<-df1 %>% filter(Career!="15")
ga.out1<-lm(harm~Career,data=df2)
anova(ga.out1)
## Analysis of Variance Table
##
## Response: harm
## Df Sum Sq Mean Sq F value Pr(>F)
## Career 4 6.697 1.6742 1.5589 0.1904
## Residuals 109 117.058 1.0739
summary(ga.out1)
##
## Call:
## lm(formula = harm ~ Career, data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3333 -0.6522 -0.4298 0.3478 3.3478
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.5263 0.2377 6.420 3.65e-09 ***
## Career2 0.1259 0.2826 0.445 0.6570
## Career3 0.5107 0.3103 1.646 0.1027
## Career4 0.3968 0.3730 1.064 0.2898
## Career5 0.8070 0.4193 1.924 0.0569 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.036 on 109 degrees of freedom
## Multiple R-squared: 0.05411, Adjusted R-squared: 0.0194
## F-statistic: 1.559 on 4 and 109 DF, p-value: 0.1904
library(multcomp)
## Warning: 패키지 'multcomp'는 R 버전 4.1.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: mvtnorm
## 필요한 패키지를 로딩중입니다: survival
## 필요한 패키지를 로딩중입니다: TH.data
## Warning: 패키지 'TH.data'는 R 버전 4.1.2에서 작성되었습니다
## 필요한 패키지를 로딩중입니다: MASS
##
## 다음의 패키지를 부착합니다: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## 다음의 패키지를 부착합니다: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
ga.out2<-glht(ga.out1,linfct=mcp(Career="Tukey"))
summary(ga.out2)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lm(formula = harm ~ Career, data = df2)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 2 - 1 == 0 0.1259 0.2826 0.445 0.991
## 3 - 1 == 0 0.5107 0.3103 1.646 0.461
## 4 - 1 == 0 0.3968 0.3730 1.064 0.818
## 5 - 1 == 0 0.8070 0.4193 1.924 0.301
## 3 - 2 == 0 0.3849 0.2512 1.532 0.534
## 4 - 2 == 0 0.2709 0.3255 0.832 0.917
## 5 - 2 == 0 0.6812 0.3777 1.803 0.367
## 4 - 3 == 0 -0.1140 0.3498 -0.326 0.997
## 5 - 3 == 0 0.2963 0.3989 0.743 0.943
## 5 - 4 == 0 0.4103 0.4494 0.913 0.887
## (Adjusted p values reported -- single-step method)