A sample of 978 young men aged 20 to 22 years was extracted from the National Longitudinal Survey of Youth (NLSY). The outcome of interest is whether a youth’s reported major activity in 1985 was (1) working, (2) in school, or (3) inactive. Six additional variables were also recorded.

Column 1: Employment status: School = In school, Work = Working, Idle = Inactive
Column 2: Ethnicity, Black or Others
Column 3: Family structure, Non-intact or Otherwise
Column 4: Father’s education, College or Otherwise
Column 5: Family income in 1979 in thousands
Column 6: Local unemployment rate in 1980
Column 7: Standardized score on the Armed Services Aptitude Battery Test

1 load package

pacman::p_load(tidyverse, MASS, pscl)

2 Input data

dta2 <- read.table("C:/Users/USER/Desktop/homework/youth_employment.txt", h=T)

# first 6 lines
head(dta2)

##   Employment   Race     Family    DadEdu Income Local   ASAB
## 1       Work Others Non-Intact Otherwise 0.4909   6.9 -0.110
## 2     School Others Non-Intact Otherwise 0.6940   4.8  0.452
## 3       Work Others  Otherwise Otherwise 1.1000   6.5  0.967
## 4       Work Others Non-Intact   College 1.5000   3.8  1.667
## 5       Work Others Non-Intact Otherwise 0.2544   6.9  0.000
## 6     School Others Non-Intact Otherwise 0.9391   5.4  0.000

# check data structure
str(dta2)

## 'data.frame':    978 obs. of  7 variables:
##  $ Employment: chr  "Work" "School" "Work" "Work" ...
##  $ Race      : chr  "Others" "Others" "Others" "Others" ...
##  $ Family    : chr  "Non-Intact" "Non-Intact" "Otherwise" "Non-Intact" ...
##  $ DadEdu    : chr  "Otherwise" "Otherwise" "Otherwise" "College" ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...

3 Data management

dta2$Race <- relevel(factor(dta2$Race), ref="Others", levels=c("Black", "Others"))
dta2$Family<-relevel(factor(dta2$Family), ref="Otherwise")
dta2$DadEdu<-relevel(factor(dta2$DadEdu), ref="Otherwise")
dta2$Incf<- factor(cut(dta2$Income, 
                       breaks=c(quantile(dta2$Income, probs=seq(0, 1, by=.33))),
                       labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T))
dta2$Localf<- factor(cut(dta2$Local, 
                       breaks=c(quantile(dta2$Local, probs=seq(0, 1, by=.5))),
                       labels=c("Low","High"), ordered=T, lowest.inclued=T))
dta2$ASABf<- factor(cut(dta2$ASAB, 
                       breaks=c(quantile(dta2$ASAB, probs=seq(0, 1, by=.33))),
                       labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T))

str(dta2)

## 'data.frame':    978 obs. of  10 variables:
##  $ Employment: chr  "Work" "School" "Work" "Work" ...
##  $ Race      : Factor w/ 2 levels "Others","Black": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Family    : Factor w/ 2 levels "Otherwise","Non-Intact": 2 2 1 2 2 2 1 1 2 1 ...
##  $ DadEdu    : Factor w/ 2 levels "Otherwise","College": 1 1 1 2 1 1 1 2 2 2 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ Incf      : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 2 3 3 1 3 3 3 3 3 ...
##  $ Localf    : Ord.factor w/ 2 levels "Low"<"High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ASABf     : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 3 3 NA 2 2 2 3 3 3 ...

with(dta2,ftable(Race,Family, DadEdu,Employment))

##                             Employment Idle School Work
## Race   Family     DadEdu                               
## Others Otherwise  Otherwise              84    161  141
##                   College                23     66   72
##        Non-Intact Otherwise              47     43   54
##                   College                 5     12   18
## Black  Otherwise  Otherwise              28     60   26
##                   College                 6      8    3
##        Non-Intact Otherwise              39     40   27
##                   College                 3     10    2

a<-as.data.frame(with(dta2,ftable(Race,Family, DadEdu,Employment)))

with(dta2, ftable(Incf,ASABf,Localf, Employment))

##                      Employment Idle School Work
## Incf   ASABf  Localf                            
## Low    Low    Low                 26     39   19
##               High                42     34   19
##        Middle Low                 21     22   15
##               High                11     26   10
##        High   Low                  3     13    8
##               High                 4      5    3
## Middle Low    Low                 13     20   24
##               High                11     24   10
##        Middle Low                 22     28   36
##               High                 6     16   13
##        High   Low                  9     22   29
##               High                 9     19   12
## High   Low    Low                  6      9   10
##               High                 4      6    5
##        Middle Low                 18     23   16
##               High                 4     16   15
##        High   Low                 15     40   59
##               High                11     26   27

b<-as.data.frame(with(dta2, ftable(Incf,ASABf,Localf, Employment)))

dta2_fn<-subset(dta2, Employment!="School")%>% mutate(resp=ifelse(Employment=="Work", 1,0))
dta2_fp<-subset(dta2, Employment!="Work") %>% mutate(resp=ifelse(Employment=="School",1,0))
m_fn<-glm(resp~Race+Family+DadEdu+Income+Local+ASAB, data=dta2_fn, family=binomial)
m_fp<-glm(resp~Race+Family+DadEdu+Income+Local+ASAB, data=dta2_fp, family=binomial)
sjPlot::tab_model(m_fn, m_fp, show.obs = F, show.r2 = F)

	resp			resp
Predictors	Odds Ratios	CI	p	Odds Ratios	CI	p
(Intercept)	2.02	1.00 – 4.10	0.051	1.52	0.78 – 2.94	0.218
Race [Black]	0.64	0.41 – 0.99	0.046	1.25	0.85 – 1.84	0.265
Family [Non-Intact]	0.90	0.62 – 1.33	0.609	0.57	0.40 – 0.82	0.003
DadEdu [College]	1.19	0.74 – 1.92	0.471	1.28	0.81 – 2.04	0.303
Income	1.72	1.10 – 2.73	0.019	1.22	0.84 – 1.81	0.316
Local	0.92	0.85 – 0.99	0.025	1.01	0.95 – 1.09	0.708
ASAB	1.34	1.07 – 1.67	0.010	1.22	0.99 – 1.51	0.067

4 plot

ggplot(subset(a, Employment!="Idle"), aes(DadEdu, Freq/sum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_grid(Family~Race)+
  scale_y_continuous(seq(-1.5, 1,by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")

## `geom_smooth()` using formula 'y ~ x'

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

ggplot(subset(b, Employment!="Idle"), aes(ASABf, Freq/cumsum(Freq), color=Employment))+
  geom_line()+
  stat_smooth(method = "glm", method.args = list(family=binomial))+
  geom_point()+
  facet_wrap(Incf~Localf, nrow=2)+
  scale_y_continuous(seq(-1.5, 1,by=.5))+
  labs(x="Father's education", y="Log-odds(Inactive)")

## `geom_smooth()` using formula 'y ~ x'

## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

summary(m0 <- nnet::multinom(Employment ~ ., data = dta2))

## # weights:  39 (24 variable)
## initial  value 1046.977511 
## iter  10 value 995.801470
## iter  20 value 989.576609
## final  value 989.321874 
## converged

## Call:
## nnet::multinom(formula = Employment ~ ., data = dta2)
## 
## Coefficients:
##        (Intercept)  RaceBlack FamilyNon-Intact DadEduCollege      Income
## School   0.7018017  0.2391504      -0.54708859     0.2263025 -0.07437054
## Work     0.7184842 -0.3885990      -0.04838905     0.1326759  0.22198338
##               Local        ASAB    Incf.L      Incf.Q    Localf.L   ASABf.L
## School -0.004238422 -0.01547256 0.1184440 -0.09025778  0.11135278 0.4116813
## Work   -0.067554926 -0.08513663 0.3412993 -0.29255812 -0.02332271 0.5843150
##           ASABf.Q
## School 0.04306545
## Work   0.12077936
## 
## Std. Errors:
##        (Intercept) RaceBlack FamilyNon-Intact DadEduCollege    Income
## School   0.5384768 0.1997812        0.1888134     0.2396764 0.4137664
## Work     0.5635782 0.2230273        0.1957335     0.2457077 0.4089509
##             Local      ASAB    Incf.L    Incf.Q  Localf.L   ASABf.L   ASABf.Q
## School 0.05398749 0.2637922 0.3230540 0.1537179 0.1882321 0.4201313 0.1469578
## Work   0.05915372 0.2756148 0.3261682 0.1578611 0.2015597 0.4372910 0.1540811
## 
## Residual Deviance: 1978.644 
## AIC: 2026.644

1102_In class exercise 2

Pei-Yun,Li

2020-11-08

1 load package

2 Input data

3 Data management

4 plot

5 The end