library(moonBook)
## Warning: package 'moonBook' was built under R version 4.0.4
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ggplot2)
library(ggeffects)
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(broom)
library(descr)
load("C:/RRR/kgss/kgsslogistic_210304.RData")

step1. 기본 회귀식

reg1 <- glm(T_harrassment1 ~ r_gender 
            + r_workinghour 
            + r_age_c 
            + r_job
            + r_jobclass
            + r_gender*r_workinghour
            , family = binomial(link = "logit")
            , data = MD2, na.action = na.omit)
car::vif(reg1) 
## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4
##                             GVIF Df GVIF^(1/(2*Df))
## r_gender               19.214010  1        4.383379
## r_workinghour           3.164958  1        1.779033
## r_age_c                 1.926282  3        1.115458
## r_job                   1.228188  1        1.108236
## r_jobclass              2.283512  7        1.060754
## r_gender:r_workinghour 20.713786  1        4.551240

다중공선성 문제 발생 따라서 r_workinghour 변수에 대해서 중심화 작업 필요

MD2$r_workinghour1 <- MD2$r_workinghour - mean(MD2$r_workinghour, na.rm = T)

중심화 이후 다시 로짓 분석 실시

reg1 <- glm(T_harrassment1 ~ r_gender 
            + r_workinghour1 
            + r_age_c 
            + r_job
            + r_jobclass
            + r_gender*r_workinghour1
            , family = binomial(link = "logit")
            , data = MD2, na.action = na.omit)
summary(reg1)
## 
## Call:
## glm(formula = T_harrassment1 ~ r_gender + r_workinghour1 + r_age_c + 
##     r_job + r_jobclass + r_gender * r_workinghour1, family = binomial(link = "logit"), 
##     data = MD2, na.action = na.omit)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8918  -0.8256  -0.5094   0.9172   2.3819  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -1.51746    0.55160  -2.751  0.00594 ** 
## r_gender여성                 1.73844    0.23201   7.493 6.73e-14 ***
## r_workinghour1              -0.04796    0.07158  -0.670  0.50285    
## r_age_c30s                  -0.20663    0.27640  -0.748  0.45471    
## r_age_c40s                  -0.73527    0.37588  -1.956  0.05045 .  
## r_age_c50s 이상             -1.56017    0.73174  -2.132  0.03300 *  
## r_job                       -0.17941    0.04602  -3.899 9.66e-05 ***
## r_jobclass감독급/기사        0.78156    0.50389   1.551  0.12089    
## r_jobclass실장,팀장,1st      0.97490    0.52313   1.864  0.06238 .  
## r_jobclass2nd                0.64473    0.57162   1.128  0.25936    
## r_jobclass3rd이하            1.21636    0.56845   2.140  0.03237 *  
## r_jobclass수습, 인턴         0.66683    0.89511   0.745  0.45629    
## r_jobclass사원               0.47737    0.60081   0.795  0.42688    
## r_jobclass기타               1.14298    0.65707   1.739  0.08195 .  
## r_gender여성:r_workinghour1  0.18008    0.08612   2.091  0.03653 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 691.77  on 523  degrees of freedom
## Residual deviance: 568.71  on 509  degrees of freedom
##   (311 observations deleted due to missingness)
## AIC: 598.71
## 
## Number of Fisher Scoring iterations: 4
car::vif(reg1)
##                             GVIF Df GVIF^(1/(2*Df))
## r_gender                1.133590  1        1.064702
## r_workinghour1          3.164958  1        1.779033
## r_age_c                 1.926282  3        1.115458
## r_job                   1.228188  1        1.108236
## r_jobclass              2.283512  7        1.060754
## r_gender:r_workinghour1 3.048012  1        1.745856
moonBook::ORplot(reg1,type=2,show.CI=T,main="Main Title", sig.level=0.05, pch=1) #유의한 변수만

moonBook::ORplot(reg1,type=2,show.CI=T,main="Main Title", pch=1) #유의하지 않은 변수까지

stargazer(reg1, type = "text")
## 
## =====================================================
##                               Dependent variable:    
##                           ---------------------------
##                                 T_harrassment1       
## -----------------------------------------------------
## r_gender여성                         1.738***          
##                                     (0.232)          
##                                                      
## r_workinghour1                      -0.048           
##                                     (0.072)          
##                                                      
## r_age_c30s                          -0.207           
##                                     (0.276)          
##                                                      
## r_age_c40s                          -0.735*          
##                                     (0.376)          
##                                                      
## r_age_c50s 이상                      -1.560**          
##                                     (0.732)          
##                                                      
## r_job                              -0.179***         
##                                     (0.046)          
##                                                      
## r_jobclass감독급/기사                     0.782           
##                                     (0.504)          
##                                                      
## r_jobclass실장,팀장,1st                 0.975*           
##                                     (0.523)          
##                                                      
## r_jobclass2nd                        0.645           
##                                     (0.572)          
##                                                      
## r_jobclass3rd이하                     1.216**          
##                                     (0.568)          
##                                                      
## r_jobclass수습, 인턴                     0.667           
##                                     (0.895)          
##                                                      
## r_jobclass사원                         0.477           
##                                     (0.601)          
##                                                      
## r_jobclass기타                        1.143*           
##                                     (0.657)          
##                                                      
## r_gender여성:r_workinghour1           0.180**          
##                                     (0.086)          
##                                                      
## Constant                           -1.517***         
##                                     (0.552)          
##                                                      
## -----------------------------------------------------
## Observations                          524            
## Log Likelihood                     -284.355          
## Akaike Inf. Crit.                   598.710          
## =====================================================
## Note:                     *p<0.1; **p<0.05; ***p<0.01

문제 없으면 이제 오즈비로 변환!

reg1_logit <- cbind(OR = coef(reg1), confint(reg1)) #이건 위 회귀계수값(로짓)이랑 똑같이 나옴.
## Waiting for profiling to be done...

logit에서 log를 없애서 오즈비를 구해보자

reg1_odds_ratio <- exp(reg1_logit) #오즈비 보기

reg1_odds_ratio
##                                    OR      2.5 %     97.5 %
## (Intercept)                 0.2192676 0.07036300  0.6222022
## r_gender여성                5.6884636 3.64222984  9.0577490
## r_workinghour1              0.9531745 0.82550289  1.0935598
## r_age_c30s                  0.8133208 0.47226279  1.3985765
## r_age_c40s                  0.4793747 0.22667642  0.9935245
## r_age_c50s 이상             0.2100997 0.04172356  0.7923760
## r_job                       0.8357613 0.76200922  0.9129571
## r_jobclass감독급/기사       2.1848780 0.84601171  6.2284540
## r_jobclass실장,팀장,1st     2.6508999 0.98619321  7.8193609
## r_jobclass2nd               1.9054784 0.63932716  6.1194192
## r_jobclass3rd이하           3.3748674 1.14492431 10.8132278
## r_jobclass수습, 인턴        1.9480606 0.30565982 11.0111508
## r_jobclass사원              1.6118241 0.50831069  5.4519332
## r_jobclass기타              3.1360938 0.88115558 11.7906017
## r_gender여성:r_workinghour1 1.1973170 1.01400471  1.4219475

Step2. 이제 그림그리기!

다음을 참고함!

그림1 p로 그리기

model1 <- broom::augment(reg1, type.predict = "response") %>%
  dplyr::mutate(y_hat = .fitted)
View(model1) # model이라는 새로운 데이터 프레임이 생긴다
model1  %>% filter(r_workinghour1 >= -9.8 & r_workinghour1 <= 13.4) %>%
  ggplot(aes(x = r_workinghour1, y = y_hat, col = r_gender)) +
  geom_point() +
  geom_smooth(method = "lm") +
  scale_color_brewer(palette = "Dark2")+ 
  labs(title = "성희롱, 성폭력 피해 - 성별과 노동시간의 상호작용 효과"
       , subtitle = ""
       , x = "One Day_working hours"
       , y = "Probability"
       , col = "GENDER")+
  theme_bw()+ 
  scale_x_continuous(breaks = c(-10,-5,0,5,10,14), labels=c(0,5,10,15,20,24))
## `geom_smooth()` using formula 'y ~ x'

그림2 odds ratio로 그리기

model2 <- augment(reg1, type.predict = "response") %>% 
  mutate(y_hat = .fitted) %>%  
  mutate(odds = y_hat / (1-y_hat))
ggplot(model2) +
  aes(x = r_workinghour1, y = odds, colour = r_gender) +
  geom_point(size = 1L) +
  geom_smooth(span = 0.75) +
  scale_color_brewer(palette = "Dark2") +
  theme_bw()+ 
  labs(title = "성희롱, 성폭력 피해 ~ 성별과 노동시간의 상호작용 효과"
       , subtitle = ""
       , x = "One Day_working hours"
       , y = "Odds Ratio"
       , col = "GENDER")+
  scale_x_continuous(breaks = c(-10,-5,0,5,10,14), labels=c(0,5,10,15,20,24))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

그림3 MEM으로 그리기

model3 <- ggpredict(reg1, terms = c("r_workinghour1", "r_gender"))
## Data were 'prettified'. Consider using `terms="r_workinghour1 [all]"` to get smooth plots.
ggplot(model3) +
  aes(x = x, y = predicted, colour = group) +
  geom_line(size = 1L, lty = 2) +
  scale_color_brewer(palette = "Dark2") +
  theme_bw()+ 
  theme_bw()+ 
  labs(title = "성희롱, 성폭력 피해 ~ 성별과 노동시간의 상호작용 효과"
       , subtitle = ""
       , x = "One Day_working hours"
       , y = "Predicted"
       , col = "GENDER")+
  scale_x_continuous(breaks = c(-10,-5,0,5,10,14), labels=c(0,5,10,15,20,24))+
  facet_wrap(vars(group))

그림4 MEM으로 그리기2

plot(Effect(focal.predictors = c("r_workinghour1", "r_gender"), reg1))