#
# Job search data
# olmm適用於longitudinal (or clustered) ordinal (or multinomial) respones不同類型的兩階段線性混合模型
# formula : y ~ ce(x1) + ge(x2) +re(1 + ge(w2) | id)
# ce category-specific non-proportional odds effect 特定類別效應
# ge global proportional odds fixed effect 全面效應
# re 隨機效應在這裡面指定
# family
# cumulative 累加上去的機率 ex: link =”logit” (較低類別機率總和取對數)
# cumulative 不可用ce 指定random effects
# adjacent(link = "logit")取兩個相鄰類別中較低的機率的對數
# baseline(link = "logit")參照最高類別的類別機率的對數。(baseline 只能用 ce)
# load packages for later use
pacman::p_load("foreign","nnet","tidyverse","vcrpart")
# path to data
dta <- read.csv("C:/Users/user/Desktop/tutorial/hfwork.csv", sep=",")
# data structure
str(dta)
## 'data.frame': 18913 obs. of 9 variables:
## $ id : num 1.02e+14 1.02e+14 1.02e+14 1.02e+14 1.02e+14 ...
## $ area : int 0 0 0 0 0 0 0 0 0 0 ...
## $ sex : int 1 1 1 1 2 1 1 2 2 1 ...
## $ age : int 37 29 40 36 34 44 43 44 32 42 ...
## $ marry : int 3 2 3 1 1 1 1 1 2 2 ...
## $ edus : int 2 2 2 2 2 2 2 2 2 2 ...
## $ edul : int 6 8 6 6 6 6 6 6 6 7 ...
## $ major : int 6 11 5 5 3 3 5 3 3 3 ...
## $ hfwork: int 9 6 6 1 6 6 6 6 1 9 ...
# data cleaning and recoding
dta$hfwork <- ifelse(dta$hfwork == "0", NA, dta$hfwork)
#篩選專科(含)以上學歷的人
dta <- subset(dta, edul > 6, select = 1:9)
dta <- dta %>%
mutate( id = as.factor(id),
area = as.factor(area),
sex = as.factor(sex),
marry = as.factor(marry),
edus = as.factor(edus),
edul = as.factor(edul),
major = as.factor(ifelse(dta$major == 0, NA,ifelse(dta$hfwork ==13, NA, dta$major))),
hfwork = as.factor(ifelse(dta$hfwork ==0, NA, ifelse(dta$hfwork ==10, NA, dta$hfwork))))
dta$sex <- recode_factor(dta$sex, '1' = 'M', '2' = 'F' )
dta$area <- recode_factor(dta$area,
'0' = 'Taiwan',
'1' = 'Taipei',
'2' = 'Kaohsiung',
'3' = 'New Taipei',
'4' = 'Taichung',
'5' = 'Tainan',
'6' = 'Taoyuan' )
dta$hfwork <- recode_factor(dta$hfwork,
'1' = 'Referral',
'2' = 'School',
'3' = 'ESC_Gov',
'4' = 'ESC_Priv',
'5' = 'Union',
'6' = 'Ads',
'7' = 'Civil_Exam' ,
'8' = 'Transfer',
'9' = 'Family')
dta$edul <- factor(dta$edul, levels = c(7, 8, 9, 10),
labels = c("AD", "BS", "MA", "PhD"))
# Work search by education
with(dta, table(edul, hfwork))
## hfwork
## edul Referral School ESC_Gov ESC_Priv Union Ads Civil_Exam Transfer
## AD 879 15 60 339 4 1100 241 99
## BS 1502 54 151 1100 13 2608 622 165
## MA 292 20 29 256 0 531 260 61
## PhD 43 2 4 14 0 66 17 11
## hfwork
## edul Family
## AD 753
## BS 934
## MA 139
## PhD 10
barplot(with(dta, table(edul, hfwork)))

barplot(with(dta, prop.table(table(edul, hfwork), 2)))

# edul*hfwork freq
data.frame(with(dta, table(edul, hfwork)))
## edul hfwork Freq
## 1 AD Referral 879
## 2 BS Referral 1502
## 3 MA Referral 292
## 4 PhD Referral 43
## 5 AD School 15
## 6 BS School 54
## 7 MA School 20
## 8 PhD School 2
## 9 AD ESC_Gov 60
## 10 BS ESC_Gov 151
## 11 MA ESC_Gov 29
## 12 PhD ESC_Gov 4
## 13 AD ESC_Priv 339
## 14 BS ESC_Priv 1100
## 15 MA ESC_Priv 256
## 16 PhD ESC_Priv 14
## 17 AD Union 4
## 18 BS Union 13
## 19 MA Union 0
## 20 PhD Union 0
## 21 AD Ads 1100
## 22 BS Ads 2608
## 23 MA Ads 531
## 24 PhD Ads 66
## 25 AD Civil_Exam 241
## 26 BS Civil_Exam 622
## 27 MA Civil_Exam 260
## 28 PhD Civil_Exam 17
## 29 AD Transfer 99
## 30 BS Transfer 165
## 31 MA Transfer 61
## 32 PhD Transfer 11
## 33 AD Family 753
## 34 BS Family 934
## 35 MA Family 139
## 36 PhD Family 10
# 9種找工作方式 每一種的總人數
# 2 = columns
x <- apply(with(dta, ftable(edul, hfwork)),2,sum)
dta$hfwork <- factor(dta$hfwork, levels(dta$hfwork)[order(x)])
# have a good look at it
ggplot(na.omit(dta), aes(hfwork, fill = edul)) +
geom_bar(aes(y = 100 * ..count../sum(..count..)), position=position_dodge())+
coord_flip() +
labs(x = "Job search", y = "Percentage (100%)") +
guides(fill = guide_legend(reverse = TRUE))+
theme_bw()

#碩士比較喜歡考公職考試?
# have a good look at it
ggplot(na.omit(dta), aes(hfwork, fill = area)) +
geom_bar(aes(y = 100 * ..count../sum(..count..)), position=position_dodge())+
labs(x = "Job search", y = "Percentage (100%)") +
guides(fill = guide_legend(reverse = TRUE))+
theme_bw()

# school+union = others
dta$hfwork<-recode_factor(dta$hfwork,"School"="Others",
"Union"="Others",
"Transfer"="Others",
"ESC_Gov"="Others")
#抽1500人來試試看
sdta<- sample(1:nrow(dta),1500)
sdta <- dta[sdta,]
str(sdta)
## 'data.frame': 1500 obs. of 9 variables:
## $ id : Factor w/ 248 levels "1.0201e+14","1.0203e+14",..: 208 244 176 44 146 243 126 172 229 4 ...
## $ area : Factor w/ 7 levels "Taiwan","Taipei",..: 4 7 2 1 1 7 1 2 6 1 ...
## $ sex : Factor w/ 2 levels "M","F": 1 1 1 2 2 2 2 2 2 2 ...
## $ age : int 60 41 45 23 27 51 27 30 45 41 ...
## $ marry : Factor w/ 4 levels "1","2","3","4": 2 4 2 1 2 3 1 2 2 1 ...
## $ edus : Factor w/ 3 levels "1","2","3": 2 2 2 2 2 2 2 2 2 2 ...
## $ edul : Factor w/ 4 levels "AD","BS","MA",..: 1 2 3 2 2 3 2 2 3 1 ...
## $ major : Factor w/ 13 levels "1","2","3","4",..: 3 6 4 7 3 1 7 3 7 3 ...
## $ hfwork: Factor w/ 6 levels "Others","Civil_Exam",..: 3 5 2 6 5 2 6 5 6 5 ...
with(sdta, table(hfwork, edul))
## edul
## hfwork AD BS MA PhD
## Others 16 48 14 2
## Civil_Exam 41 75 36 2
## ESC_Priv 51 134 26 1
## Family 98 107 12 2
## Referral 108 176 34 2
## Ads 136 315 42 9
with(sdta, table(hfwork, area))
## area
## hfwork Taiwan Taipei Kaohsiung New Taipei Taichung Tainan Taoyuan
## Others 36 6 9 9 8 8 4
## Civil_Exam 64 12 18 15 12 14 19
## ESC_Priv 66 17 21 45 17 18 28
## Family 80 29 29 13 24 21 23
## Referral 131 29 19 35 47 25 34
## Ads 177 53 40 67 78 43 44
## model
sdta$edul<-as.numeric(sdta$edul,ordered = T) #好解釋
#一直失敗
#summary(m1 <- olmm(hfwork ~ edul+ re(1|area), na.action = na.omit, data = sdta, family = adjacent()))
summary(m2 <- olmm(hfwork ~ edul+ ce(1|area), na.action = na.omit,
data = sdta, family = baseline()))
## Warning in Ops.factor(area, edul): '+' not meaningful for factors
## Warning in Ops.factor(area, edul): '+' not meaningful for factors
## Warning in olmm_check_mm(X): design matrix appears to be column rank-
## deficient so dropping some coefs
## Linear Model
##
## Family: baseline logit
## Formula: hfwork ~ edul + ce(1 | area)
## Data: sdta
##
## Goodness of fit:
## AIC BIC logLik
## 4914.319 4940.841 -2452.159
## (13 observations deleted due to missingness)
##
## Category-specific fixed effects:
## Estimate Std. Error z value
## Others|Ads:(Intercept) -1.836580 0.120383 -15.2562
## Civil_Exam|Ads:(Intercept) -1.181649 0.092117 -12.8277
## ESC_Priv|Ads:(Intercept) -0.862014 0.081909 -10.5241
## Family|Ads:(Intercept) -0.829529 0.080983 -10.2433
## Referral|Ads:(Intercept) -0.450290 0.071533 -6.2948
summary(m3 <- olmm(hfwork ~ edul+ area, na.action = na.omit,
data = sdta, family = baseline()))
## Linear Model
##
## Family: baseline logit
## Formula: hfwork ~ edul + area
## Data: sdta
##
## Goodness of fit:
## AIC BIC logLik
## 4877.985 5090.166 -2398.993
## (13 observations deleted due to missingness)
##
## Category-specific fixed effects:
## Estimate Std. Error z value
## Others|Ads:(Intercept) -2.375206 0.396175 -5.9953
## Others|Ads:edul 0.413972 0.189143 2.1887
## Others|Ads:areaTaipei -0.649707 0.468021 -1.3882
## Others|Ads:areaKaohsiung 0.082430 0.410508 0.2008
## Others|Ads:areaNew Taipei -0.426405 0.410115 -1.0397
## Others|Ads:areaTaichung -0.711817 0.414824 -1.7159
## Others|Ads:areaTainan -0.129127 0.429434 -0.3007
## Others|Ads:areaTaoyuan -0.838317 0.595894 -1.4068
## Civil_Exam|Ads:(Intercept) -1.652500 0.286670 -5.7645
## Civil_Exam|Ads:edul 0.338920 0.130461 2.5979
## Civil_Exam|Ads:areaTaipei -0.519487 0.353991 -1.4675
## Civil_Exam|Ads:areaKaohsiung 0.204491 0.319454 0.6401
## Civil_Exam|Ads:areaNew Taipei -0.488646 0.320781 -1.5233
## Civil_Exam|Ads:areaTaichung -0.876787 0.342778 -2.5579
## Civil_Exam|Ads:areaTainan -0.136582 0.341204 -0.4003
## Civil_Exam|Ads:areaTaoyuan 0.150655 0.316861 0.4755
## ESC_Priv|Ads:(Intercept) -1.179882 0.288189 -4.0941
## ESC_Priv|Ads:edul 0.105950 0.134246 0.7892
## ESC_Priv|Ads:areaTaipei -0.165874 0.314355 -0.5277
## ESC_Priv|Ads:areaKaohsiung 0.338336 0.305878 1.1061
## ESC_Priv|Ads:areaNew Taipei 0.585782 0.240986 2.4308
## ESC_Priv|Ads:areaTaichung -0.544072 0.304117 -1.7890
## ESC_Priv|Ads:areaTainan 0.106694 0.315408 0.3383
## ESC_Priv|Ads:areaTaoyuan 0.526353 0.281874 1.8673
## Family|Ads:(Intercept) 0.178204 0.259774 0.6860
## Family|Ads:edul -0.574665 0.131182 -4.3807
## Family|Ads:areaTaipei 0.265222 0.270914 0.9790
## Family|Ads:areaKaohsiung 0.490070 0.283797 1.7268
## Family|Ads:areaNew Taipei -0.834999 0.336799 -2.4792
## Family|Ads:areaTaichung -0.343007 0.269424 -1.2731
## Family|Ads:areaTainan 0.113942 0.298415 0.3818
## Family|Ads:areaTaoyuan 0.185337 0.293606 0.6312
## Referral|Ads:(Intercept) -0.024724 0.228689 -0.1081
## Referral|Ads:edul -0.155853 0.112178 -1.3893
## Referral|Ads:areaTaipei -0.280585 0.258997 -1.0834
## Referral|Ads:areaKaohsiung -0.438479 0.302493 -1.4496
## Referral|Ads:areaNew Taipei -0.344845 0.238881 -1.4436
## Referral|Ads:areaTaichung -0.194913 0.217944 -0.8943
## Referral|Ads:areaTainan -0.229589 0.277020 -0.8288
## Referral|Ads:areaTaoyuan 0.054741 0.255553 0.2142