#load package

pacman::p_load(tidyverse, MASS, nnet)

#input data

dta <- read.table("youth_employment.txt", h=T)

head(dta)
##   Employment   Race     Family    DadEdu Income Local   ASAB
## 1       Work Others Non-Intact Otherwise 0.4909   6.9 -0.110
## 2     School Others Non-Intact Otherwise 0.6940   4.8  0.452
## 3       Work Others  Otherwise Otherwise 1.1000   6.5  0.967
## 4       Work Others Non-Intact   College 1.5000   3.8  1.667
## 5       Work Others Non-Intact Otherwise 0.2544   6.9  0.000
## 6     School Others Non-Intact Otherwise 0.9391   5.4  0.000
str(dta)
## 'data.frame':    978 obs. of  7 variables:
##  $ Employment: chr  "Work" "School" "Work" "Work" ...
##  $ Race      : chr  "Others" "Others" "Others" "Others" ...
##  $ Family    : chr  "Non-Intact" "Non-Intact" "Otherwise" "Non-Intact" ...
##  $ DadEdu    : chr  "Otherwise" "Otherwise" "Otherwise" "College" ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...

1 data management

dta$ID <- paste(1:978)
dta$Race <- relevel(factor(dta$Race), ref="Others")
dta$Employment <- relevel(factor(dta$Employment), ref="Idle")
dta$Family <- relevel(factor(dta$Family), ref="Otherwise")
dta$DadEdu <- relevel(factor(dta$DadEdu), ref="Otherwise")

##1 ntile的切法

dta <- dta %>% mutate(Incf = ntile(Income, 3),
                      ASABf = ntile(ASAB, 3),
                      Localf  = ntile(Local, 2),
                      Work = ifelse(dta$Employment == "Work",1,0),
                      School = ifelse(dta$Employment == "School",1,0),
                      Idle = ifelse(dta$Employment == "Idle",1,0))

dta$Incf <- factor(dta$Incf, 1:3, labels = c("Low","Middle","High"))
dta$ASABf <- factor(dta$ASABf, 1:3, labels = c("Low","Middle","High"))
dta$Localf <- factor(dta$Localf, 1:2, labels = c("Low","High"))
str(dta)
## 'data.frame':    978 obs. of  14 variables:
##  $ Employment: Factor w/ 3 levels "Idle","School",..: 3 2 3 3 3 2 3 2 2 3 ...
##  $ Race      : Factor w/ 2 levels "Others","Black": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family    : Factor w/ 2 levels "Otherwise","Non-Intact": 2 2 1 2 2 2 1 1 2 1 ...
##  $ DadEdu    : Factor w/ 2 levels "Otherwise","College": 1 1 1 2 1 1 1 2 2 2 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ ID        : chr  "1" "2" "3" "4" ...
##  $ Incf      : Factor w/ 3 levels "Low","Middle",..: 1 2 3 3 1 3 3 3 3 3 ...
##  $ ASABf     : Factor w/ 3 levels "Low","Middle",..: 2 3 3 3 2 2 2 3 3 3 ...
##  $ Localf    : Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Work      : num  1 0 1 1 1 0 1 0 0 1 ...
##  $ School    : num  0 1 0 0 0 1 0 1 1 0 ...
##  $ Idle      : num  0 0 0 0 0 0 0 0 0 0 ...
aggregate(cbind(Work, School, Idle) ~ Race + Family + DadEdu, data=dta, FUN = sum)
##     Race     Family    DadEdu Work School Idle
## 1 Others  Otherwise Otherwise  141    161   84
## 2  Black  Otherwise Otherwise   26     60   28
## 3 Others Non-Intact Otherwise   54     43   47
## 4  Black Non-Intact Otherwise   27     40   39
## 5 Others  Otherwise   College   72     66   23
## 6  Black  Otherwise   College    3      8    6
## 7 Others Non-Intact   College   18     12    5
## 8  Black Non-Intact   College    2     10    3
aggregate(cbind(Idle, School, Work) ~ Incf + ASABf + Localf, data = dta, FUN = sum)
##      Incf  ASABf Localf Idle School Work
## 1     Low    Low    Low   21     33   17
## 2  Middle    Low    Low   10     19   21
## 3    High    Low    Low    6      8   10
## 4     Low Middle    Low   13     17   10
## 5  Middle Middle    Low   19     24   28
## 6    High Middle    Low   17     21   17
## 7     Low   High    Low    2     11    8
## 8  Middle   High    Low    7     20   22
## 9    High   High    Low   11     42   55
## 10    Low    Low   High   48     41   22
## 11 Middle    Low   High   14     26   14
## 12   High    Low   High    4      7    5
## 13    Low Middle   High   19     32   16
## 14 Middle Middle   High    9     24   19
## 15   High Middle   High    7     18   16
## 16    Low   High   High    5      8    3
## 17 Middle   High   High   10     19   21
## 18   High   High   High   13     30   39

##2 cut quantile切法

dta$Incf2<- factor(cut(dta$Income, 
                       breaks=c(quantile(dta$Income, probs=seq(0, 1, by=.33))),
                       labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T))
dta$Localf2<- factor(cut(dta$Local, 
                       breaks=c(quantile(dta$Local, probs=seq(0, 1, by=.5))),
                       labels=c("Low","High"), ordered=T, lowest.inclued=T))
dta$ASABf2<- factor(cut(dta$ASAB, 
                       breaks=c(quantile(dta$ASAB, probs=seq(0, 1, by=.33))),
                       labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T))
str(dta)
## 'data.frame':    978 obs. of  17 variables:
##  $ Employment: Factor w/ 3 levels "Idle","School",..: 3 2 3 3 3 2 3 2 2 3 ...
##  $ Race      : Factor w/ 2 levels "Others","Black": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family    : Factor w/ 2 levels "Otherwise","Non-Intact": 2 2 1 2 2 2 1 1 2 1 ...
##  $ DadEdu    : Factor w/ 2 levels "Otherwise","College": 1 1 1 2 1 1 1 2 2 2 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ ID        : chr  "1" "2" "3" "4" ...
##  $ Incf      : Factor w/ 3 levels "Low","Middle",..: 1 2 3 3 1 3 3 3 3 3 ...
##  $ ASABf     : Factor w/ 3 levels "Low","Middle",..: 2 3 3 3 2 2 2 3 3 3 ...
##  $ Localf    : Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Work      : num  1 0 1 1 1 0 1 0 0 1 ...
##  $ School    : num  0 1 0 0 0 1 0 1 1 0 ...
##  $ Idle      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Incf2     : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 2 3 3 1 3 3 3 3 3 ...
##  $ Localf2   : Ord.factor w/ 2 levels "Low"<"High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ASABf2    : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 3 3 NA 2 2 2 3 3 3 ...
with(dta,ftable(Race,Family, DadEdu,Employment))
##                             Employment Idle School Work
## Race   Family     DadEdu                               
## Others Otherwise  Otherwise              84    161  141
##                   College                23     66   72
##        Non-Intact Otherwise              47     43   54
##                   College                 5     12   18
## Black  Otherwise  Otherwise              28     60   26
##                   College                 6      8    3
##        Non-Intact Otherwise              39     40   27
##                   College                 3     10    2
a<-as.data.frame(with(dta,ftable(Race,Family, DadEdu,Employment)))
b <- as.data.frame(with(dta, ftable(Incf2, ASABf2, Localf2, Employment)))
with(dta, ftable(Incf2,ASABf2,Localf2, Employment))
##                       Employment Idle School Work
## Incf2  ASABf2 Localf2                            
## Low    Low    Low                  26     39   19
##               High                 42     34   19
##        Middle Low                  21     22   15
##               High                 11     26   10
##        High   Low                   3     13    8
##               High                  4      5    3
## Middle Low    Low                  13     20   24
##               High                 11     24   10
##        Middle Low                  22     28   36
##               High                  6     16   13
##        High   Low                   9     22   29
##               High                  9     19   12
## High   Low    Low                   6      9   10
##               High                  4      6    5
##        Middle Low                  18     23   16
##               High                  4     16   15
##        High   Low                  15     40   59
##               High                 11     26   27

2 plot freq

with(dta, table(Employment, Race))
##           Race
## Employment Others Black
##     Idle      159    76
##     School    282   118
##     Work      285    58
with(dta, table(Employment, DadEdu))
##           DadEdu
## Employment Otherwise College
##     Idle         198      37
##     School       304      96
##     Work         248      95
HH::likert(t(with(dta, table(Employment, Race))), 
           main="", 
           ylab="Race", 
           as.percent=TRUE)

HH::likert(t(with(dta, table(Employment, DadEdu))), 
           main="", 
           ylab="DadEdu", 
           as.percent=TRUE)

3 Model

3.1 Employment variable separate

dta_fn <- subset(dta, Employment != 'School') %>% mutate(resp = ifelse(Employment=='Work', 1, 0))
dta_pn <- subset(dta, Employment != 'Work') %>% mutate(resp = ifelse(Employment=='School', 1, 0))
m_fn <- glm(resp ~ Race+Family+DadEdu+Income+Local+ASAB, data=dta_fn, family=binomial)
m_pn <- glm(resp ~ Race+Family+DadEdu+Income+Local+ASAB, data=dta_pn, family=binomial)

sjPlot::tab_model(m_fn, m_pn, show.obs=F, show.r2=F)
  resp resp
Predictors Odds Ratios CI p Odds Ratios CI p
(Intercept) 2.02 1.00 – 4.10 0.051 1.52 0.78 – 2.94 0.218
Race [Black] 0.64 0.41 – 0.99 0.046 1.25 0.85 – 1.84 0.265
Family [Non-Intact] 0.90 0.62 – 1.33 0.609 0.57 0.40 – 0.82 0.003
DadEdu [College] 1.19 0.74 – 1.92 0.471 1.28 0.81 – 2.04 0.303
Income 1.72 1.10 – 2.73 0.019 1.22 0.84 – 1.81 0.316
Local 0.92 0.85 – 0.99 0.025 1.01 0.95 – 1.09 0.708
ASAB 1.34 1.07 – 1.67 0.010 1.22 0.99 – 1.51 0.067

3.2 One Employment variable

m2 <- nnet::multinom(Employment ~ Race+Family+DadEdu+Income+Local+ASAB, data=dta, Hess=TRUE, trace=F)
sjPlot::tab_model(m2, show.r2=FALSE, show.obs=FALSE)
  Employment
Predictors Odds Ratios CI p Response
(Intercept) 1.43 0.75 – 2.75 0.281 School
Race [Black] 1.26 0.86 – 1.85 0.243 School
Family [Non-Intact] 0.58 0.40 – 0.83 0.003 School
DadEdu [College] 1.27 0.80 – 2.02 0.305 School
Income 1.31 0.87 – 1.97 0.199 School
Local 1.01 0.95 – 1.08 0.718 School
ASAB 1.19 0.97 – 1.47 0.097 School
(Intercept) 2.07 1.05 – 4.08 0.037 Work
Race [Black] 0.64 0.42 – 0.98 0.042 Work
Family [Non-Intact] 0.87 0.60 – 1.27 0.485 Work
DadEdu [College] 1.20 0.75 – 1.92 0.456 Work
Income 1.50 0.99 – 2.27 0.054 Work
Local 0.93 0.87 – 1.00 0.057 Work
ASAB 1.36 1.10 – 1.69 0.005 Work

4 plot

ggplot(subset(a, Employment!="Idle"), 
       aes(DadEdu, Freq/cumsum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(aes(DadEdu, Freq/cumsum(Freq)), 
              method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_grid(Family ~ Race)+
  geom_hline(yintercept = 0, linetype = 3) +
  scale_y_continuous(limits=c(-1,1), breaks=seq(-1, 1, by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")
## `geom_smooth()` using formula 'y ~ x'
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(subset(b, Employment!="Idle"), 
       aes(ASABf2, Freq/cumsum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_wrap(Incf2~Localf2, nrow=2)+
  geom_hline(yintercept = 0, linetype = 3) +
  scale_y_continuous(limits=c(-1,1), breaks=seq(-1, 1, by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")
## `geom_smooth()` using formula 'y ~ x'
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

# The end