#
# Job search data

# olmm適用於longitudinal (or clustered) ordinal (or multinomial) respones不同類型的兩階段線性混合模型
# formula : y ~ ce(x1) + ge(x2) +re(1 + ge(w2) | id)
# ce  category-specific non-proportional odds effect 特定類別效應
# ge  global proportional odds fixed effect 全面效應
# re  隨機效應在這裡面指定

# family
  # cumulative 累加上去的機率  ex: link =”logit” (較低類別機率總和取對數)
  #                                cumulative 不可用ce 指定random effects
  # adjacent(link = "logit")取兩個相鄰類別中較低的機率的對數
  # baseline(link = "logit")參照最高類別的類別機率的對數。(baseline 只能用 ce)
                    
                    

# load packages for later use
pacman::p_load("foreign","nnet","tidyverse","vcrpart")
 
# path to data
dta <- read.csv("C:/Users/user/Desktop/tutorial/hfwork.csv", sep=",") 

# data structure
str(dta)
## 'data.frame':    18913 obs. of  9 variables:
##  $ id    : num  1.02e+14 1.02e+14 1.02e+14 1.02e+14 1.02e+14 ...
##  $ area  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ sex   : int  1 1 1 1 2 1 1 2 2 1 ...
##  $ age   : int  37 29 40 36 34 44 43 44 32 42 ...
##  $ marry : int  3 2 3 1 1 1 1 1 2 2 ...
##  $ edus  : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ edul  : int  6 8 6 6 6 6 6 6 6 7 ...
##  $ major : int  6 11 5 5 3 3 5 3 3 3 ...
##  $ hfwork: int  9 6 6 1 6 6 6 6 1 9 ...
# data cleaning and recoding
dta$hfwork <- ifelse(dta$hfwork == "0", NA, dta$hfwork)

#篩選專科(含)以上學歷的人
dta <- subset(dta, edul > 6, select = 1:9)

dta <- dta %>% 
  mutate( id = as.factor(id), 
          area = as.factor(area),
          sex = as.factor(sex),
          marry = as.factor(marry),
          edus = as.factor(edus),
          edul = as.factor(edul),
          major = as.factor(ifelse(dta$major == 0, NA,ifelse(dta$hfwork ==13, NA, dta$major))),
          hfwork = as.factor(ifelse(dta$hfwork ==0, NA, ifelse(dta$hfwork ==10, NA, dta$hfwork))))

dta$sex <- recode_factor(dta$sex, '1' = 'M', '2' = 'F' )

dta$area <- recode_factor(dta$area,
                          '0' = 'Taiwan',
                          '1' = 'Taipei',
                          '2' = 'Kaohsiung',
                          '3' = 'New Taipei',
                          '4' = 'Taichung',
                          '5' = 'Tainan',
                          '6' = 'Taoyuan' )
                          
dta$hfwork <- recode_factor(dta$hfwork,
                          '1' = 'Referral',
                          '2' = 'School',
                          '3' = 'ESC_Gov',
                          '4' = 'ESC_Priv',
                          '5' = 'Union',
                          '6' = 'Ads',
                          '7' = 'Civil_Exam' ,
                          '8' = 'Transfer',
                          '9' = 'Family')

dta$edul <- factor(dta$edul, levels = c(7, 8, 9, 10), 
                   labels = c("AD", "BS", "MA", "PhD"))

# Work search by education
with(dta, table(edul, hfwork))
##      hfwork
## edul  Referral School ESC_Gov ESC_Priv Union  Ads Civil_Exam Transfer
##   AD       879     15      60      339     4 1100        241       99
##   BS      1502     54     151     1100    13 2608        622      165
##   MA       292     20      29      256     0  531        260       61
##   PhD       43      2       4       14     0   66         17       11
##      hfwork
## edul  Family
##   AD     753
##   BS     934
##   MA     139
##   PhD     10
barplot(with(dta, table(edul, hfwork)))

barplot(with(dta, prop.table(table(edul, hfwork), 2)))

# edul*hfwork freq
data.frame(with(dta, table(edul, hfwork)))
##    edul     hfwork Freq
## 1    AD   Referral  879
## 2    BS   Referral 1502
## 3    MA   Referral  292
## 4   PhD   Referral   43
## 5    AD     School   15
## 6    BS     School   54
## 7    MA     School   20
## 8   PhD     School    2
## 9    AD    ESC_Gov   60
## 10   BS    ESC_Gov  151
## 11   MA    ESC_Gov   29
## 12  PhD    ESC_Gov    4
## 13   AD   ESC_Priv  339
## 14   BS   ESC_Priv 1100
## 15   MA   ESC_Priv  256
## 16  PhD   ESC_Priv   14
## 17   AD      Union    4
## 18   BS      Union   13
## 19   MA      Union    0
## 20  PhD      Union    0
## 21   AD        Ads 1100
## 22   BS        Ads 2608
## 23   MA        Ads  531
## 24  PhD        Ads   66
## 25   AD Civil_Exam  241
## 26   BS Civil_Exam  622
## 27   MA Civil_Exam  260
## 28  PhD Civil_Exam   17
## 29   AD   Transfer   99
## 30   BS   Transfer  165
## 31   MA   Transfer   61
## 32  PhD   Transfer   11
## 33   AD     Family  753
## 34   BS     Family  934
## 35   MA     Family  139
## 36  PhD     Family   10
# 9種找工作方式 每一種的總人數
# 2 = columns
x <- apply(with(dta, ftable(edul, hfwork)),2,sum)

dta$hfwork <- factor(dta$hfwork, levels(dta$hfwork)[order(x)])

# have a good look at it
ggplot(na.omit(dta), aes(hfwork, fill = edul)) +
 geom_bar(aes(y = 100 * ..count../sum(..count..)), position=position_dodge())+
 coord_flip() +
 labs(x = "Job search", y = "Percentage (100%)") +
 guides(fill = guide_legend(reverse = TRUE))+
 theme_bw()

#碩士比較喜歡考公職考試?

# have a good look at it
ggplot(na.omit(dta), aes(hfwork, fill = area)) +
  geom_bar(aes(y = 100 * ..count../sum(..count..)), position=position_dodge())+
  labs(x = "Job search", y = "Percentage (100%)") +
  guides(fill = guide_legend(reverse = TRUE))+
  theme_bw()

# school+union = others
dta$hfwork<-recode_factor(dta$hfwork,"School"="Others",
                                     "Union"="Others",
                                     "Transfer"="Others",
                                     "ESC_Gov"="Others")

#抽1500人來試試看
sdta<- sample(1:nrow(dta),1500)
sdta <- dta[sdta,]
str(sdta)
## 'data.frame':    1500 obs. of  9 variables:
##  $ id    : Factor w/ 248 levels "1.0201e+14","1.0203e+14",..: 208 244 176 44 146 243 126 172 229 4 ...
##  $ area  : Factor w/ 7 levels "Taiwan","Taipei",..: 4 7 2 1 1 7 1 2 6 1 ...
##  $ sex   : Factor w/ 2 levels "M","F": 1 1 1 2 2 2 2 2 2 2 ...
##  $ age   : int  60 41 45 23 27 51 27 30 45 41 ...
##  $ marry : Factor w/ 4 levels "1","2","3","4": 2 4 2 1 2 3 1 2 2 1 ...
##  $ edus  : Factor w/ 3 levels "1","2","3": 2 2 2 2 2 2 2 2 2 2 ...
##  $ edul  : Factor w/ 4 levels "AD","BS","MA",..: 1 2 3 2 2 3 2 2 3 1 ...
##  $ major : Factor w/ 13 levels "1","2","3","4",..: 3 6 4 7 3 1 7 3 7 3 ...
##  $ hfwork: Factor w/ 6 levels "Others","Civil_Exam",..: 3 5 2 6 5 2 6 5 6 5 ...
with(sdta, table(hfwork, edul))
##             edul
## hfwork        AD  BS  MA PhD
##   Others      16  48  14   2
##   Civil_Exam  41  75  36   2
##   ESC_Priv    51 134  26   1
##   Family      98 107  12   2
##   Referral   108 176  34   2
##   Ads        136 315  42   9
with(sdta, table(hfwork, area))
##             area
## hfwork       Taiwan Taipei Kaohsiung New Taipei Taichung Tainan Taoyuan
##   Others         36      6         9          9        8      8       4
##   Civil_Exam     64     12        18         15       12     14      19
##   ESC_Priv       66     17        21         45       17     18      28
##   Family         80     29        29         13       24     21      23
##   Referral      131     29        19         35       47     25      34
##   Ads           177     53        40         67       78     43      44
## model
sdta$edul<-as.numeric(sdta$edul,ordered = T) #好解釋

#一直失敗
#summary(m1 <- olmm(hfwork ~  edul+ re(1|area), na.action = na.omit, data = sdta, family = adjacent()))

summary(m2 <- olmm(hfwork ~  edul+ ce(1|area), na.action = na.omit,
                   data = sdta, family = baseline()))
## Warning in Ops.factor(area, edul): '+' not meaningful for factors

## Warning in Ops.factor(area, edul): '+' not meaningful for factors
## Warning in olmm_check_mm(X): design matrix appears to be column rank-
## deficient so dropping some coefs
## Linear Model 
## 
##  Family: baseline logit 
## Formula: hfwork ~ edul + ce(1 | area) 
##    Data: sdta 
## 
## Goodness of fit:
##       AIC      BIC    logLik
##  4914.319 4940.841 -2452.159
## (13 observations deleted due to missingness) 
## 
## Category-specific fixed effects:
##                             Estimate Std. Error  z value
## Others|Ads:(Intercept)     -1.836580   0.120383 -15.2562
## Civil_Exam|Ads:(Intercept) -1.181649   0.092117 -12.8277
## ESC_Priv|Ads:(Intercept)   -0.862014   0.081909 -10.5241
## Family|Ads:(Intercept)     -0.829529   0.080983 -10.2433
## Referral|Ads:(Intercept)   -0.450290   0.071533  -6.2948
summary(m3 <- olmm(hfwork ~  edul+ area, na.action = na.omit,
                   data = sdta, family = baseline()))
## Linear Model 
## 
##  Family: baseline logit 
## Formula: hfwork ~ edul + area 
##    Data: sdta 
## 
## Goodness of fit:
##       AIC      BIC    logLik
##  4877.985 5090.166 -2398.993
## (13 observations deleted due to missingness) 
## 
## Category-specific fixed effects:
##                                Estimate Std. Error z value
## Others|Ads:(Intercept)        -2.375206   0.396175 -5.9953
## Others|Ads:edul                0.413972   0.189143  2.1887
## Others|Ads:areaTaipei         -0.649707   0.468021 -1.3882
## Others|Ads:areaKaohsiung       0.082430   0.410508  0.2008
## Others|Ads:areaNew Taipei     -0.426405   0.410115 -1.0397
## Others|Ads:areaTaichung       -0.711817   0.414824 -1.7159
## Others|Ads:areaTainan         -0.129127   0.429434 -0.3007
## Others|Ads:areaTaoyuan        -0.838317   0.595894 -1.4068
## Civil_Exam|Ads:(Intercept)    -1.652500   0.286670 -5.7645
## Civil_Exam|Ads:edul            0.338920   0.130461  2.5979
## Civil_Exam|Ads:areaTaipei     -0.519487   0.353991 -1.4675
## Civil_Exam|Ads:areaKaohsiung   0.204491   0.319454  0.6401
## Civil_Exam|Ads:areaNew Taipei -0.488646   0.320781 -1.5233
## Civil_Exam|Ads:areaTaichung   -0.876787   0.342778 -2.5579
## Civil_Exam|Ads:areaTainan     -0.136582   0.341204 -0.4003
## Civil_Exam|Ads:areaTaoyuan     0.150655   0.316861  0.4755
## ESC_Priv|Ads:(Intercept)      -1.179882   0.288189 -4.0941
## ESC_Priv|Ads:edul              0.105950   0.134246  0.7892
## ESC_Priv|Ads:areaTaipei       -0.165874   0.314355 -0.5277
## ESC_Priv|Ads:areaKaohsiung     0.338336   0.305878  1.1061
## ESC_Priv|Ads:areaNew Taipei    0.585782   0.240986  2.4308
## ESC_Priv|Ads:areaTaichung     -0.544072   0.304117 -1.7890
## ESC_Priv|Ads:areaTainan        0.106694   0.315408  0.3383
## ESC_Priv|Ads:areaTaoyuan       0.526353   0.281874  1.8673
## Family|Ads:(Intercept)         0.178204   0.259774  0.6860
## Family|Ads:edul               -0.574665   0.131182 -4.3807
## Family|Ads:areaTaipei          0.265222   0.270914  0.9790
## Family|Ads:areaKaohsiung       0.490070   0.283797  1.7268
## Family|Ads:areaNew Taipei     -0.834999   0.336799 -2.4792
## Family|Ads:areaTaichung       -0.343007   0.269424 -1.2731
## Family|Ads:areaTainan          0.113942   0.298415  0.3818
## Family|Ads:areaTaoyuan         0.185337   0.293606  0.6312
## Referral|Ads:(Intercept)      -0.024724   0.228689 -0.1081
## Referral|Ads:edul             -0.155853   0.112178 -1.3893
## Referral|Ads:areaTaipei       -0.280585   0.258997 -1.0834
## Referral|Ads:areaKaohsiung    -0.438479   0.302493 -1.4496
## Referral|Ads:areaNew Taipei   -0.344845   0.238881 -1.4436
## Referral|Ads:areaTaichung     -0.194913   0.217944 -0.8943
## Referral|Ads:areaTainan       -0.229589   0.277020 -0.8288
## Referral|Ads:areaTaoyuan       0.054741   0.255553  0.2142