1 Introduction

A sample of 978 young men aged 20 to 22 years was extracted from the National Longitudinal Survey of Youth (NLSY). The outcome of interest is whether a youth’s reported major activity in 1985 was (1) working, (2) in school, or (3) inactive. Six additional variables were also recorded.

Source: Powers, D.A., & Xie, Y. (2000). Statistical Methods for Categorical Data Analysis. p. 232.

Column 1: Employment status: School = In school, Work = Working, Idle = Inactive
Column 2: Ethnicity, Black or Others
Column 3: Family structure, Non-intact or Otherwise
Column 4: Father’s education, College or Otherwise
Column 5: Family income in 1979 in thousands
Column 6: Local unemployment rate in 1980
Column 7: Standardized score on the Armed Services Aptitude Battery Test

2 Data management

#install apckages
pacman::p_load(tidyverse, MASS, nnet)
#input data
dta <- read.table("C:/Users/Ching-Fang Wu/Documents/data/youth_employment.txt", h=T)
#show first 6 lines
head(dta)
##   Employment   Race     Family    DadEdu Income Local   ASAB
## 1       Work Others Non-Intact Otherwise 0.4909   6.9 -0.110
## 2     School Others Non-Intact Otherwise 0.6940   4.8  0.452
## 3       Work Others  Otherwise Otherwise 1.1000   6.5  0.967
## 4       Work Others Non-Intact   College 1.5000   3.8  1.667
## 5       Work Others Non-Intact Otherwise 0.2544   6.9  0.000
## 6     School Others Non-Intact Otherwise 0.9391   5.4  0.000
#show structure of data
str(dta)
## 'data.frame':    978 obs. of  7 variables:
##  $ Employment: chr  "Work" "School" "Work" "Work" ...
##  $ Race      : chr  "Others" "Others" "Others" "Others" ...
##  $ Family    : chr  "Non-Intact" "Non-Intact" "Otherwise" "Non-Intact" ...
##  $ DadEdu    : chr  "Otherwise" "Otherwise" "Otherwise" "College" ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...

類別變數有四個:Employment、Race、Family、DadEdu 數值變數有三個:Income、Local、ASAB

summary(dta)
##   Employment            Race              Family             DadEdu         
##  Length:978         Length:978         Length:978         Length:978        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Income           Local             ASAB        
##  Min.   :0.0000   Min.   : 2.000   Min.   :-2.0670  
##  1st Qu.:0.3677   1st Qu.: 5.400   1st Qu.:-0.9427  
##  Median :0.6940   Median : 6.900   Median :-0.1105  
##  Mean   :0.7942   Mean   : 6.994   Mean   :-0.1250  
##  3rd Qu.:1.0500   3rd Qu.: 8.100   3rd Qu.: 0.6390  
##  Max.   :4.3302   Max.   :16.600   Max.   : 1.8280
#relevel可重新排序因子變數的level,將指定的level放在第一位置
dta$Employment <- relevel(factor(dta$Employment), ref="Idle")
dta$Race <- relevel(factor(dta$Race), ref="Black") 
dta$Family<-relevel(factor(dta$Family), ref="Non-Intact")
dta$DadEdu<-relevel(factor(dta$DadEdu), ref="College")
#用cut將資料切分數段,切分點為quantile,再用label給新標籤
dta$Incf<- factor(cut(dta$Income,breaks=c(quantile(dta$Income, probs=seq(0, 1, by=.33))),labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T)) #將income切分三個等級,分別為"Low", "Middle","High",並新增類別變數Incf

dta$Localf<- factor(cut(dta$Local,breaks=c(quantile(dta$Local, probs=seq(0, 1, by=.5))),labels=c("Low","High"), ordered=T, lowest.inclued=T)) #失業率切分為"低""高",並新增類別變數Localf

dta$ASABf<- factor(cut(dta$ASAB,breaks=c(quantile(dta$ASAB, probs=seq(0, 1, by=.33))),labels=c("Low", "Middle","High"), ordered=T, lowest.inclued=T)) #測驗分數切分三個等級"低""中""高",並新增類別變數ASABf
str(dta)
## 'data.frame':    978 obs. of  10 variables:
##  $ Employment: Factor w/ 3 levels "Idle","School",..: 3 2 3 3 3 2 3 2 2 3 ...
##  $ Race      : Factor w/ 2 levels "Black","Others": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Family    : Factor w/ 2 levels "Non-Intact","Otherwise": 1 1 2 1 1 1 2 2 1 2 ...
##  $ DadEdu    : Factor w/ 2 levels "College","Otherwise": 2 2 2 1 2 2 2 1 1 1 ...
##  $ Income    : num  0.491 0.694 1.1 1.5 0.254 ...
##  $ Local     : num  6.9 4.8 6.5 3.8 6.9 5.4 4.2 6.2 6.2 6.9 ...
##  $ ASAB      : num  -0.11 0.452 0.967 1.667 0 ...
##  $ Incf      : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 2 3 3 1 3 3 3 3 3 ...
##  $ Localf    : Ord.factor w/ 2 levels "Low"<"High": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ASABf     : Ord.factor w/ 3 levels "Low"<"Middle"<..: 2 3 3 NA 2 2 2 3 3 3 ...
#用ftable建立列聯表
with(dta,ftable(Race,Family, DadEdu,Employment)) #
##                             Employment Idle School Work
## Race   Family     DadEdu                               
## Black  Non-Intact College                 3     10    2
##                   Otherwise              39     40   27
##        Otherwise  College                 6      8    3
##                   Otherwise              28     60   26
## Others Non-Intact College                 5     12   18
##                   Otherwise              47     43   54
##        Otherwise  College                23     66   72
##                   Otherwise              84    161  141

1.黑人、單親家庭、父母教育程度大學以上, (1)且不活動(啃老族)的有3人、(2)且在學校(繼續升學)的有10人、(3)且有工作(經濟獨立)者2人。

with(dta, ftable(Incf,ASABf,Localf, Employment))
##                      Employment Idle School Work
## Incf   ASABf  Localf                            
## Low    Low    Low                 26     39   19
##               High                42     34   19
##        Middle Low                 21     22   15
##               High                11     26   10
##        High   Low                  3     13    8
##               High                 4      5    3
## Middle Low    Low                 13     20   24
##               High                11     24   10
##        Middle Low                 22     28   36
##               High                 6     16   13
##        High   Low                  9     22   29
##               High                 9     19   12
## High   Low    Low                  6      9   10
##               High                 4      6    5
##        Middle Low                 18     23   16
##               High                 4     16   15
##        High   Low                 15     40   59
##               High                11     26   27

1.低所得、低測驗分數、低失業率地區, (1)且不活動(啃老族)的有26人、(2)且在學校(繼續升學)的有39人、(3)且有工作(經濟獨立)者19人。

a<-as.data.frame(with(dta,ftable(Race,Family, DadEdu,Employment)))

b<-as.data.frame(with(dta, ftable(Incf,ASABf,Localf, Employment)))

3

summary(m0 <- multinom(formula= Employment ~ ., data = dta))
## # weights:  39 (24 variable)
## initial  value 1046.977511 
## iter  10 value 995.971270
## iter  20 value 989.501130
## final  value 989.321874 
## converged
## Call:
## multinom(formula = Employment ~ ., data = dta)
## 
## Coefficients:
##        (Intercept) RaceOthers FamilyOtherwise DadEduOtherwise      Income
## School   0.6202230 -0.2391485      0.54708943      -0.2263038 -0.07441741
## Work     0.4142315  0.3886016      0.04839001      -0.1326755  0.22193394
##               Local        ASAB    Incf.L      Incf.Q   Localf.L   ASABf.L
## School -0.004241166 -0.01546477 0.1184754 -0.09025442  0.1113601 0.4116753
## Work   -0.067558135 -0.08512935 0.3413361 -0.29255252 -0.0233147 0.5843044
##           ASABf.Q
## School 0.04306771
## Work   0.12078234
## 
## Std. Errors:
##        (Intercept) RaceOthers FamilyOtherwise DadEduOtherwise    Income
## School   0.6063729  0.1997812       0.1888134       0.2396764 0.4137645
## Work     0.6387940  0.2230273       0.1957335       0.2457078 0.4089487
##             Local      ASAB    Incf.L    Incf.Q  Localf.L   ASABf.L   ASABf.Q
## School 0.05398747 0.2637922 0.3230534 0.1537180 0.1882321 0.4201313 0.1469578
## Work   0.05915372 0.2756148 0.3261676 0.1578612 0.2015597 0.4372910 0.1540812
## 
## Residual Deviance: 1978.644 
## AIC: 2026.644

4 The end