#load package

pacman::p_load(tidyverse, MASS, nnet)

#input data

dta <- read.table("youth_employment.txt", h=T)

head(dta)

##   Employment   Race     Family    DadEdu Income Local   ASAB
## 1       Work Others Non-Intact Otherwise 0.4909   6.9 -0.110
## 2     School Others Non-Intact Otherwise 0.6940   4.8  0.452
## 3       Work Others  Otherwise Otherwise 1.1000   6.5  0.967
## 4       Work Others Non-Intact   College 1.5000   3.8  1.667
## 5       Work Others Non-Intact Otherwise 0.2544   6.9  0.000
## 6     School Others Non-Intact Otherwise 0.9391   5.4  0.000

str(dta)

## 'data.frame':    978 obs. of  7 variables:
##  $ Employment: chr  "Work" "School" "Work" "Work" ...
##  $ Race      : chr  "Others" "Others" "Others" "Others" ...
##  $ Family    : chr  "Non-Intact" "Non-Intact" "Otherwise" "Non-Intact" ...
##  $ DadEdu    : chr  "Otherwise" "Otherwise" "Otherwise" "College" ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...

1 data management

dta$ID <- paste(1:978)
dta$Race <- relevel(factor(dta$Race), ref="Others")
dta$Employment <- relevel(factor(dta$Employment), ref="Idle")
dta$Family <- relevel(factor(dta$Family), ref="Otherwise")
dta$DadEdu <- relevel(factor(dta$DadEdu), ref="Otherwise")

##1 ntile的切法

dta <- dta %>% mutate(Incf = ntile(Income, 3),
                      ASABf = ntile(ASAB, 3),
                      Localf  = ntile(Local, 2),
                      Work = ifelse(dta$Employment == "Work",1,0),
                      School = ifelse(dta$Employment == "School",1,0),
                      Idle = ifelse(dta$Employment == "Idle",1,0))

dta$Incf <- factor(dta$Incf, 1:3, labels = c("Low","Middle","High"))
dta$ASABf <- factor(dta$ASABf, 1:3, labels = c("Low","Middle","High"))
dta$Localf <- factor(dta$Localf, 1:2, labels = c("Low","High"))
str(dta)

## 'data.frame':    978 obs. of  14 variables:
##  $ Employment: Factor w/ 3 levels "Idle","School",..: 3 2 3 3 3 2 3 2 2 3 ...
##  $ Race      : Factor w/ 2 levels "Others","Black": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family    : Factor w/ 2 levels "Otherwise","Non-Intact": 2 2 1 2 2 2 1 1 2 1 ...
##  $ DadEdu    : Factor w/ 2 levels "Otherwise","College": 1 1 1 2 1 1 1 2 2 2 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ ID        : chr  "1" "2" "3" "4" ...
##  $ Incf      : Factor w/ 3 levels "Low","Middle",..: 1 2 3 3 1 3 3 3 3 3 ...
##  $ ASABf     : Factor w/ 3 levels "Low","Middle",..: 2 3 3 3 2 2 2 3 3 3 ...
##  $ Localf    : Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Work      : num  1 0 1 1 1 0 1 0 0 1 ...
##  $ School    : num  0 1 0 0 0 1 0 1 1 0 ...
##  $ Idle      : num  0 0 0 0 0 0 0 0 0 0 ...

aggregate(cbind(Work, School, Idle) ~ Race + Family + DadEdu, data=dta, FUN = sum)

##     Race     Family    DadEdu Work School Idle
## 1 Others  Otherwise Otherwise  141    161   84
## 2  Black  Otherwise Otherwise   26     60   28
## 3 Others Non-Intact Otherwise   54     43   47
## 4  Black Non-Intact Otherwise   27     40   39
## 5 Others  Otherwise   College   72     66   23
## 6  Black  Otherwise   College    3      8    6
## 7 Others Non-Intact   College   18     12    5
## 8  Black Non-Intact   College    2     10    3

aggregate(cbind(Idle, School, Work) ~ Incf + ASABf + Localf, data = dta, FUN = sum)

##      Incf  ASABf Localf Idle School Work
## 1     Low    Low    Low   21     33   17
## 2  Middle    Low    Low   10     19   21
## 3    High    Low    Low    6      8   10
## 4     Low Middle    Low   13     17   10
## 5  Middle Middle    Low   19     24   28
## 6    High Middle    Low   17     21   17
## 7     Low   High    Low    2     11    8
## 8  Middle   High    Low    7     20   22
## 9    High   High    Low   11     42   55
## 10    Low    Low   High   48     41   22
## 11 Middle    Low   High   14     26   14
## 12   High    Low   High    4      7    5
## 13    Low Middle   High   19     32   16
## 14 Middle Middle   High    9     24   19
## 15   High Middle   High    7     18   16
## 16    Low   High   High    5      8    3
## 17 Middle   High   High   10     19   21
## 18   High   High   High   13     30   39

##2 cut quantile切法

dta$Incf2<- factor(cut(dta$Income, 
                       breaks=c(quantile(dta$Income, probs=seq(0, 1, by=.33))),
                       labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T))
dta$Localf2<- factor(cut(dta$Local, 
                       breaks=c(quantile(dta$Local, probs=seq(0, 1, by=.5))),
                       labels=c("Low","High"), ordered=T, lowest.inclued=T))
dta$ASABf2<- factor(cut(dta$ASAB, 
                       breaks=c(quantile(dta$ASAB, probs=seq(0, 1, by=.33))),
                       labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T))
str(dta)

## 'data.frame':    978 obs. of  17 variables:
##  $ Employment: Factor w/ 3 levels "Idle","School",..: 3 2 3 3 3 2 3 2 2 3 ...
##  $ Race      : Factor w/ 2 levels "Others","Black": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family    : Factor w/ 2 levels "Otherwise","Non-Intact": 2 2 1 2 2 2 1 1 2 1 ...
##  $ DadEdu    : Factor w/ 2 levels "Otherwise","College": 1 1 1 2 1 1 1 2 2 2 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ ID        : chr  "1" "2" "3" "4" ...
##  $ Incf      : Factor w/ 3 levels "Low","Middle",..: 1 2 3 3 1 3 3 3 3 3 ...
##  $ ASABf     : Factor w/ 3 levels "Low","Middle",..: 2 3 3 3 2 2 2 3 3 3 ...
##  $ Localf    : Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Work      : num  1 0 1 1 1 0 1 0 0 1 ...
##  $ School    : num  0 1 0 0 0 1 0 1 1 0 ...
##  $ Idle      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Incf2     : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 2 3 3 1 3 3 3 3 3 ...
##  $ Localf2   : Ord.factor w/ 2 levels "Low"<"High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ASABf2    : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 3 3 NA 2 2 2 3 3 3 ...

with(dta,ftable(Race,Family, DadEdu,Employment))

##                             Employment Idle School Work
## Race   Family     DadEdu                               
## Others Otherwise  Otherwise              84    161  141
##                   College                23     66   72
##        Non-Intact Otherwise              47     43   54
##                   College                 5     12   18
## Black  Otherwise  Otherwise              28     60   26
##                   College                 6      8    3
##        Non-Intact Otherwise              39     40   27
##                   College                 3     10    2

a<-as.data.frame(with(dta,ftable(Race,Family, DadEdu,Employment)))
b <- as.data.frame(with(dta, ftable(Incf2, ASABf2, Localf2, Employment)))
with(dta, ftable(Incf2,ASABf2,Localf2, Employment))

##                       Employment Idle School Work
## Incf2  ASABf2 Localf2                            
## Low    Low    Low                  26     39   19
##               High                 42     34   19
##        Middle Low                  21     22   15
##               High                 11     26   10
##        High   Low                   3     13    8
##               High                  4      5    3
## Middle Low    Low                  13     20   24
##               High                 11     24   10
##        Middle Low                  22     28   36
##               High                  6     16   13
##        High   Low                   9     22   29
##               High                  9     19   12
## High   Low    Low                   6      9   10
##               High                  4      6    5
##        Middle Low                  18     23   16
##               High                  4     16   15
##        High   Low                  15     40   59
##               High                 11     26   27

2 plot freq

with(dta, table(Employment, Race))

##           Race
## Employment Others Black
##     Idle      159    76
##     School    282   118
##     Work      285    58

with(dta, table(Employment, DadEdu))

##           DadEdu
## Employment Otherwise College
##     Idle         198      37
##     School       304      96
##     Work         248      95

HH::likert(t(with(dta, table(Employment, Race))), 
           main="", 
           ylab="Race", 
           as.percent=TRUE)

HH::likert(t(with(dta, table(Employment, DadEdu))), 
           main="", 
           ylab="DadEdu", 
           as.percent=TRUE)

3 Model

3.1 Employment variable separate

dta_fn <- subset(dta, Employment != 'School') %>% mutate(resp = ifelse(Employment=='Work', 1, 0))
dta_pn <- subset(dta, Employment != 'Work') %>% mutate(resp = ifelse(Employment=='School', 1, 0))
m_fn <- glm(resp ~ Race+Family+DadEdu+Income+Local+ASAB, data=dta_fn, family=binomial)
m_pn <- glm(resp ~ Race+Family+DadEdu+Income+Local+ASAB, data=dta_pn, family=binomial)

sjPlot::tab_model(m_fn, m_pn, show.obs=F, show.r2=F)

	resp			resp
Predictors	Odds Ratios	CI	p	Odds Ratios	CI	p
(Intercept)	2.02	1.00 – 4.10	0.051	1.52	0.78 – 2.94	0.218
Race [Black]	0.64	0.41 – 0.99	0.046	1.25	0.85 – 1.84	0.265
Family [Non-Intact]	0.90	0.62 – 1.33	0.609	0.57	0.40 – 0.82	0.003
DadEdu [College]	1.19	0.74 – 1.92	0.471	1.28	0.81 – 2.04	0.303
Income	1.72	1.10 – 2.73	0.019	1.22	0.84 – 1.81	0.316
Local	0.92	0.85 – 0.99	0.025	1.01	0.95 – 1.09	0.708
ASAB	1.34	1.07 – 1.67	0.010	1.22	0.99 – 1.51	0.067

3.2 One Employment variable

m2 <- nnet::multinom(Employment ~ Race+Family+DadEdu+Income+Local+ASAB, data=dta, Hess=TRUE, trace=F)
sjPlot::tab_model(m2, show.r2=FALSE, show.obs=FALSE)

	Employment
Predictors	Odds Ratios	CI	p	Response
(Intercept)	1.43	0.75 – 2.75	0.281	School
Race [Black]	1.26	0.86 – 1.85	0.243	School
Family [Non-Intact]	0.58	0.40 – 0.83	0.003	School
DadEdu [College]	1.27	0.80 – 2.02	0.305	School
Income	1.31	0.87 – 1.97	0.199	School
Local	1.01	0.95 – 1.08	0.718	School
ASAB	1.19	0.97 – 1.47	0.097	School
(Intercept)	2.07	1.05 – 4.08	0.037	Work
Race [Black]	0.64	0.42 – 0.98	0.042	Work
Family [Non-Intact]	0.87	0.60 – 1.27	0.485	Work
DadEdu [College]	1.20	0.75 – 1.92	0.456	Work
Income	1.50	0.99 – 2.27	0.054	Work
Local	0.93	0.87 – 1.00	0.057	Work
ASAB	1.36	1.10 – 1.69	0.005	Work

4 plot

ggplot(subset(a, Employment!="Idle"), 
       aes(DadEdu, Freq/cumsum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(aes(DadEdu, Freq/cumsum(Freq)), 
              method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_grid(Family ~ Race)+
  geom_hline(yintercept = 0, linetype = 3) +
  scale_y_continuous(limits=c(-1,1), breaks=seq(-1, 1, by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")

## `geom_smooth()` using formula 'y ~ x'

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(subset(b, Employment!="Idle"), 
       aes(ASABf2, Freq/cumsum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_wrap(Incf2~Localf2, nrow=2)+
  geom_hline(yintercept = 0, linetype = 3) +
  scale_y_continuous(limits=c(-1,1), breaks=seq(-1, 1, by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")

## `geom_smooth()` using formula 'y ~ x'

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

# The end

1102inclass2

1 data management

2 plot freq

3 Model

3.1 Employment variable separate

3.2 One Employment variable

4 plot