1 Data Management

Load data

pacman::p_load(tidyverse, MASS, nnet)
dta <- read.table("C:/Users/ASUS/Desktop/data/youth_employment.txt", h=T)
head(dta)
##   Employment   Race     Family    DadEdu Income Local   ASAB
## 1       Work Others Non-Intact Otherwise 0.4909   6.9 -0.110
## 2     School Others Non-Intact Otherwise 0.6940   4.8  0.452
## 3       Work Others  Otherwise Otherwise 1.1000   6.5  0.967
## 4       Work Others Non-Intact   College 1.5000   3.8  1.667
## 5       Work Others Non-Intact Otherwise 0.2544   6.9  0.000
## 6     School Others Non-Intact Otherwise 0.9391   5.4  0.000
str(dta)
## 'data.frame':    978 obs. of  7 variables:
##  $ Employment: chr  "Work" "School" "Work" "Work" ...
##  $ Race      : chr  "Others" "Others" "Others" "Others" ...
##  $ Family    : chr  "Non-Intact" "Non-Intact" "Otherwise" "Non-Intact" ...
##  $ DadEdu    : chr  "Otherwise" "Otherwise" "Otherwise" "College" ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...

2 Analysis & Results

dta$ID <- paste(1:978)
dta$Employment <- relevel(factor(dta$Employment), ref="Idle")
dta$Race <- relevel(factor(dta$Race), ref="Others", levels=c("Black", "Others"))
dta$Family <- relevel(factor(dta$Family), ref="Otherwise")
dta$DadEdu <- relevel(factor(dta$DadEdu), ref="Otherwise")
dta$Race <- relevel(factor(dta$Race), ref="Others")
dta$Family<-relevel(factor(dta$Family), ref="Non-Intact")
dta$DadEdu<-relevel(factor(dta$DadEdu), ref="Otherwise")
dta <- dta %>% mutate(Incf = ntile(Income, 3),Localf  = ntile(Local, 2),
                      ASABf = ntile(ASAB, 3))
dta$Incf <- factor(dta$Incf, 1:3, labels = c("Low","Middle","High"))
dta$Localf <- factor(dta$Localf, 1:2, labels = c("Low","High"))
dta$ASABf <- factor(dta$ASABf, 1:3, labels = c("Low","Middle","High"))

str(dta)
## 'data.frame':    978 obs. of  11 variables:
##  $ Employment: Factor w/ 3 levels "Idle","School",..: 3 2 3 3 3 2 3 2 2 3 ...
##  $ Race      : Factor w/ 2 levels "Others","Black": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family    : Factor w/ 2 levels "Non-Intact","Otherwise": 1 1 2 1 1 1 2 2 1 2 ...
##  $ DadEdu    : Factor w/ 2 levels "Otherwise","College": 1 1 1 2 1 1 1 2 2 2 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ ID        : chr  "1" "2" "3" "4" ...
##  $ Incf      : Factor w/ 3 levels "Low","Middle",..: 1 2 3 3 1 3 3 3 3 3 ...
##  $ Localf    : Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ASABf     : Factor w/ 3 levels "Low","Middle",..: 2 3 3 3 2 2 2 3 3 3 ...
with(dta, ftable(Race, Family, DadEdu, Employment))
##                             Employment Idle School Work
## Race   Family     DadEdu                               
## Others Non-Intact Otherwise              47     43   54
##                   College                 5     12   18
##        Otherwise  Otherwise              84    161  141
##                   College                23     66   72
## Black  Non-Intact Otherwise              39     40   27
##                   College                 3     10    2
##        Otherwise  Otherwise              28     60   26
##                   College                 6      8    3
a <- as.data.frame(with(dta, ftable(Race, Family, DadEdu, Employment)))
b <- as.data.frame(with(dta, ftable(Incf, ASABf, Localf, Employment)))
dta_fn <- subset(dta, Employment!="School")%>%
  mutate(resp = ifelse(Employment == "Work", 1,0))
dta_pn <- subset(dta, Employment!="Work") %>% 
  mutate(resp = ifelse(Employment == "School",1,0))

m_fn <- glm(resp ~ Race+Family+DadEdu+Income+Local+ASAB, 
            data=dta_fn, family=binomial)
m_pn <- glm(resp ~ Race+Family+DadEdu+Income+Local+ASAB, 
            data=dta_pn, family=binomial)

sjPlot::tab_model(m_fn, m_pn, show.obs = F, show.r2 = F)
  resp resp
Predictors Odds Ratios CI p Odds Ratios CI p
(Intercept) 1.82 0.89 – 3.76 0.101 0.87 0.43 – 1.73 0.685
Race [Black] 0.64 0.41 – 0.99 0.046 1.25 0.85 – 1.84 0.265
Family [Otherwise] 1.11 0.75 – 1.62 0.609 1.75 1.22 – 2.52 0.003
DadEdu [College] 1.19 0.74 – 1.92 0.471 1.28 0.81 – 2.04 0.303
Income 1.72 1.10 – 2.73 0.019 1.22 0.84 – 1.81 0.316
Local 0.92 0.85 – 0.99 0.025 1.01 0.95 – 1.09 0.708
ASAB 1.34 1.07 – 1.67 0.010 1.22 0.99 – 1.51 0.067
ggplot(subset(a, Employment!="Idle"), 
       aes(DadEdu, Freq/cumsum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(aes(DadEdu, Freq/cumsum(Freq)), 
              method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_grid(Family ~ Race)+
  geom_hline(yintercept = 0, linetype = 3) +
  scale_y_continuous(limits=c(-1,1), breaks=seq(-1, 1, by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")
## `geom_smooth()` using formula 'y ~ x'
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

``` The end