01_thesis

df<-read.csv("df.csv")
names(df)

##  [1] "癤퓆um"      "headcount"   "composition" "Sex"         "Age"        
##  [6] "Career"      "Task"        "Edu"         "smoke"       "disease"    
## [11] "harm"        "edu_imp"     "edu_charge"  "edu_time"    "edu_need"   
## [16] "test"        "place"       "clean_use"   "clean_no"    "clean"      
## [21] "skill"       "recruit"     "system_need" "rule_1"      "rule_2"     
## [26] "rule_3"      "rule_4"      "rule_5"      "tail_29_1"   "tail_29_2"  
## [31] "tail_29_3"   "tail_29_4"   "tail_29_5"   "comply_1"    "comply_2"   
## [36] "comply_3"    "comply_4"    "comply_5"    "tail_31_1"   "tail_31_2"  
## [41] "tail_31_3"   "tail_31_4"   "tail_31_5"   "rule_effect" "sales"      
## [46] "year_no_1"   "year_no_2"   "part_1"      "part_2"      "part_3"     
## [51] "part_4"

df1<-df[,c(6,11)]
str(df1)

## 'data.frame':    115 obs. of  2 variables:
##  $ Career: int  5 3 3 3 2 5 3 2 1 4 ...
##  $ harm  : int  1 1 2 2 2 5 2 1 1 1 ...

df1$Career<-as.factor(df1$Career)
table(df1$Career)

## 
##  1  2  3  4  5 15 
## 19 46 27 13  9  1

head(df1)

##   Career harm
## 1      5    1
## 2      3    1
## 3      3    2
## 4      3    2
## 5      2    2
## 6      5    5

library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.2     v dplyr   1.0.6
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1

## Warning: 패키지 'ggplot2'는 R 버전 4.1.3에서 작성되었습니다

## Warning: 패키지 'tidyr'는 R 버전 4.1.3에서 작성되었습니다

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

df2<-df1 %>% filter(Career!="15")
ga.out1<-lm(harm~Career,data=df2)
anova(ga.out1)

## Analysis of Variance Table
## 
## Response: harm
##            Df  Sum Sq Mean Sq F value Pr(>F)
## Career      4   6.697  1.6742  1.5589 0.1904
## Residuals 109 117.058  1.0739

summary(ga.out1)

## 
## Call:
## lm(formula = harm ~ Career, data = df2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3333 -0.6522 -0.4298  0.3478  3.3478 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.5263     0.2377   6.420 3.65e-09 ***
## Career2       0.1259     0.2826   0.445   0.6570    
## Career3       0.5107     0.3103   1.646   0.1027    
## Career4       0.3968     0.3730   1.064   0.2898    
## Career5       0.8070     0.4193   1.924   0.0569 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.036 on 109 degrees of freedom
## Multiple R-squared:  0.05411,    Adjusted R-squared:  0.0194 
## F-statistic: 1.559 on 4 and 109 DF,  p-value: 0.1904

library(multcomp)

## Warning: 패키지 'multcomp'는 R 버전 4.1.2에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: mvtnorm

## 필요한 패키지를 로딩중입니다: survival

## 필요한 패키지를 로딩중입니다: TH.data

## Warning: 패키지 'TH.data'는 R 버전 4.1.2에서 작성되었습니다

## 필요한 패키지를 로딩중입니다: MASS

## 
## 다음의 패키지를 부착합니다: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## 다음의 패키지를 부착합니다: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

ga.out2<-glht(ga.out1,linfct=mcp(Career="Tukey"))
summary(ga.out2)

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = harm ~ Career, data = df2)
## 
## Linear Hypotheses:
##            Estimate Std. Error t value Pr(>|t|)
## 2 - 1 == 0   0.1259     0.2826   0.445    0.991
## 3 - 1 == 0   0.5107     0.3103   1.646    0.461
## 4 - 1 == 0   0.3968     0.3730   1.064    0.818
## 5 - 1 == 0   0.8070     0.4193   1.924    0.301
## 3 - 2 == 0   0.3849     0.2512   1.532    0.534
## 4 - 2 == 0   0.2709     0.3255   0.832    0.917
## 5 - 2 == 0   0.6812     0.3777   1.803    0.367
## 4 - 3 == 0  -0.1140     0.3498  -0.326    0.997
## 5 - 3 == 0   0.2963     0.3989   0.743    0.943
## 5 - 4 == 0   0.4103     0.4494   0.913    0.887
## (Adjusted p values reported -- single-step method)

01_thesis

0409

2022 4 13