1 input data

#
dta <- read.table("married.txt", h=T)

#顯示前六筆，看資料結構
head(dta)

##       教育 性別 婚姻 年齡
## 1 國小以下   女 未婚 32.5
## 2     專科   男 有偶 57.5
## 3     國中   男 有偶 42.5
## 4 國小以下   男 有偶 42.5
## 5     高中   女 喪偶 62.5
## 6     高中   女 有偶 52.5

str(dta)

## 'data.frame':    10000 obs. of  4 variables:
##  $ 教育: chr  "國小以下" "專科" "國中" "國小以下" ...
##  $ 性別: chr  "女" "男" "男" "男" ...
##  $ 婚姻: chr  "未婚" "有偶" "有偶" "有偶" ...
##  $ 年齡: num  32.5 57.5 42.5 42.5 62.5 52.5 32.5 37.5 42.5 62.5 ...

summary(dta)

##      教育               性別               婚姻                年齡      
##  Length:10000       Length:10000       Length:10000       Min.   :32.50  
##  Class :character   Class :character   Class :character   1st Qu.:37.50  
##  Mode  :character   Mode  :character   Mode  :character   Median :47.50  
##                                                           Mean   :46.62  
##                                                           3rd Qu.:52.50  
##                                                           Max.   :62.50

2 data management

#重排教育各水準
dta$教育 <- factor(dta$教育, levels = c('國小以下', '國中', '高中',
                                              '專科', '大學以上'))

#重排婚姻，由多到少
dta$婚姻 <- factor(dta$婚姻, levels = c('有偶', '未婚', '離婚', '喪偶'))

#將年齡換成年次
dta$年次 <- (102-dta$年齡)

#將婚姻狀況分為未婚與結過婚
dta$結過婚 <- ifelse(dta$婚姻 == '未婚', 0, 1)

#計算每個變項組別各百分比 
apply(dta, 2, function(x) table(x)/dim(dta)[1])

## $教育
## x
## 大學以上     高中 國小以下     國中     專科 
##   0.1070   0.3297   0.2406   0.1360   0.1867 
## 
## $性別
## x
##     女     男 
## 0.5073 0.4927 
## 
## $婚姻
## x
##   未婚   有偶   喪偶   離婚 
## 0.1966 0.6550 0.0341 0.1143 
## 
## $年齡
## x
##   32.5   37.5   42.5   47.5   52.5   57.5   62.5 
## 0.1619 0.1516 0.1435 0.1434 0.1558 0.1306 0.1132 
## 
## $年次
## x
##   39.5   44.5   49.5   54.5   59.5   64.5   69.5 
## 0.1132 0.1306 0.1558 0.1434 0.1435 0.1516 0.1619 
## 
## $結過婚
## x
##      0      1 
## 0.1966 0.8034

3 table

#單一變項與結過婚與否的列聯表
ftable(dta, row.vars = '教育', col.vars = '結過婚')

##          結過婚    0    1
## 教育                     
## 國小以下         726 1680
## 國中             273 1087
## 高中             584 2713
## 專科             311 1556
## 大學以上          72  998

ftable(dta, row.vars = '性別', col.vars = '結過婚')

##      結過婚    0    1
## 性別                 
## 女           894 4179
## 男          1072 3855

ftable(dta, row.vars = '年次', col.vars = '結過婚')

##      結過婚    0    1
## 年次                 
## 39.5          64 1068
## 44.5         105 1201
## 49.5         136 1422
## 54.5         201 1233
## 59.5         263 1172
## 64.5         422 1094
## 69.5         775  844

#兩個變項與結過婚與否的列聯表
ftable(dta, row.vars = c('教育', '性別'), col.vars = '結過婚')

##               結過婚    0    1
## 教育     性別                 
## 國小以下 女           399  774
##          男           327  906
## 國中     女           155  524
##          男           118  563
## 高中     女           235 1472
##          男           349 1241
## 專科     女            85  746
##          男           226  810
## 大學以上 女            20  663
##          男            52  335

ftable(dta, row.vars = c('教育', '年次'), col.vars = '結過婚')

##               結過婚   0   1
## 教育     年次               
## 國小以下 39.5          7 123
##          44.5         16 150
##          49.5         32 200
##          54.5         51 250
##          59.5         76 286
##          64.5        154 350
##          69.5        390 321
## 國中     39.5          5  85
##          44.5         10 119
##          49.5         17 166
##          54.5         34 172
##          59.5         50 204
##          64.5         70 223
##          69.5         87 118
## 高中     39.5         23 236
##          44.5         37 368
##          49.5         42 500
##          54.5         65 497
##          59.5         90 458
##          64.5        132 366
##          69.5        195 288
## 專科     39.5         14 146
##          44.5         21 288
##          49.5         37 425
##          54.5         41 263
##          59.5         43 204
##          64.5         57 134
##          69.5         98  96
## 大學以上 39.5         15 478
##          44.5         21 276
##          49.5          8 131
##          54.5         10  51
##          59.5          4  20
##          64.5          9  21
##          69.5          5  21

ftable(dta, row.vars = c('性別', '年次'), col.vars = '結過婚')

##           結過婚   0   1
## 性別 年次               
## 女   39.5         43 576
##      44.5         49 597
##      49.5         62 708
##      54.5         93 603
##      59.5        123 605
##      64.5        187 573
##      69.5        337 517
## 男   39.5         21 492
##      44.5         56 604
##      49.5         74 714
##      54.5        108 630
##      59.5        140 567
##      64.5        235 521
##      69.5        438 327

#三個變項與結過婚與否的列聯表，也可以換成比率
ftable(dta, row.vars = c(1, 2, 5), col.vars = '結過婚')

##                    結過婚   0   1
## 教育     性別 年次               
## 國小以下 女   39.5          6  47
##               44.5         12  58
##               49.5         23  86
##               54.5         28 104
##               59.5         53 124
##               64.5         87 155
##               69.5        190 200
##          男   39.5          1  76
##               44.5          4  92
##               49.5          9 114
##               54.5         23 146
##               59.5         23 162
##               64.5         67 195
##               69.5        200 121
## 國中     女   39.5          5  32
##               44.5          5  38
##               49.5         10  68
##               54.5         21  71
##               59.5         29 107
##               64.5         38 135
##               69.5         47  73
##          男   39.5          0  53
##               44.5          5  81
##               49.5          7  98
##               54.5         13 101
##               59.5         21  97
##               64.5         32  88
##               69.5         40  45
## 高中     女   39.5         15 117
##               44.5         19 186
##               49.5         17 257
##               54.5         29 272
##               59.5         30 255
##               64.5         49 217
##               69.5         76 168
##          男   39.5          8 119
##               44.5         18 182
##               49.5         25 243
##               54.5         36 225
##               59.5         60 203
##               64.5         83 149
##               69.5        119 120
## 專科     女   39.5         11  78
##               44.5          7 124
##               49.5         10 200
##               54.5         13 128
##               59.5          9 105
##               64.5         12  54
##               69.5         23  57
##          男   39.5          3  68
##               44.5         14 164
##               49.5         27 225
##               54.5         28 135
##               59.5         34  99
##               64.5         45  80
##               69.5         75  39
## 大學以上 女   39.5          6 302
##               44.5          6 191
##               49.5          2  97
##               54.5          2  28
##               59.5          2  14
##               64.5          1  12
##               69.5          1  19
##          男   39.5          9 176
##               44.5         15  85
##               49.5          6  34
##               54.5          8  23
##               59.5          2   6
##               64.5          8   9
##               69.5          4   2

prop.table(ftable(dta, row.vars=c(1, 2, 5), col.vars = '結過婚'), 1)

##                    結過婚          0          1
## 教育     性別 年次                             
## 國小以下 女   39.5        0.11320755 0.88679245
##               44.5        0.17142857 0.82857143
##               49.5        0.21100917 0.78899083
##               54.5        0.21212121 0.78787879
##               59.5        0.29943503 0.70056497
##               64.5        0.35950413 0.64049587
##               69.5        0.48717949 0.51282051
##          男   39.5        0.01298701 0.98701299
##               44.5        0.04166667 0.95833333
##               49.5        0.07317073 0.92682927
##               54.5        0.13609467 0.86390533
##               59.5        0.12432432 0.87567568
##               64.5        0.25572519 0.74427481
##               69.5        0.62305296 0.37694704
## 國中     女   39.5        0.13513514 0.86486486
##               44.5        0.11627907 0.88372093
##               49.5        0.12820513 0.87179487
##               54.5        0.22826087 0.77173913
##               59.5        0.21323529 0.78676471
##               64.5        0.21965318 0.78034682
##               69.5        0.39166667 0.60833333
##          男   39.5        0.00000000 1.00000000
##               44.5        0.05813953 0.94186047
##               49.5        0.06666667 0.93333333
##               54.5        0.11403509 0.88596491
##               59.5        0.17796610 0.82203390
##               64.5        0.26666667 0.73333333
##               69.5        0.47058824 0.52941176
## 高中     女   39.5        0.11363636 0.88636364
##               44.5        0.09268293 0.90731707
##               49.5        0.06204380 0.93795620
##               54.5        0.09634551 0.90365449
##               59.5        0.10526316 0.89473684
##               64.5        0.18421053 0.81578947
##               69.5        0.31147541 0.68852459
##          男   39.5        0.06299213 0.93700787
##               44.5        0.09000000 0.91000000
##               49.5        0.09328358 0.90671642
##               54.5        0.13793103 0.86206897
##               59.5        0.22813688 0.77186312
##               64.5        0.35775862 0.64224138
##               69.5        0.49790795 0.50209205
## 專科     女   39.5        0.12359551 0.87640449
##               44.5        0.05343511 0.94656489
##               49.5        0.04761905 0.95238095
##               54.5        0.09219858 0.90780142
##               59.5        0.07894737 0.92105263
##               64.5        0.18181818 0.81818182
##               69.5        0.28750000 0.71250000
##          男   39.5        0.04225352 0.95774648
##               44.5        0.07865169 0.92134831
##               49.5        0.10714286 0.89285714
##               54.5        0.17177914 0.82822086
##               59.5        0.25563910 0.74436090
##               64.5        0.36000000 0.64000000
##               69.5        0.65789474 0.34210526
## 大學以上 女   39.5        0.01948052 0.98051948
##               44.5        0.03045685 0.96954315
##               49.5        0.02020202 0.97979798
##               54.5        0.06666667 0.93333333
##               59.5        0.12500000 0.87500000
##               64.5        0.07692308 0.92307692
##               69.5        0.05000000 0.95000000
##          男   39.5        0.04864865 0.95135135
##               44.5        0.15000000 0.85000000
##               49.5        0.15000000 0.85000000
##               54.5        0.25806452 0.74193548
##               59.5        0.25000000 0.75000000
##               64.5        0.47058824 0.52941176
##               69.5        0.66666667 0.33333333

#換種方式看
ftable(dta, row.vars = c(1, 2), col.vars = c('年次','結過婚'))

##               年次   39.5     44.5     49.5     54.5     59.5     64.5     69.5    
##               結過婚    0   1    0   1    0   1    0   1    0   1    0   1    0   1
## 教育     性別                                                                      
## 國小以下 女             6  47   12  58   23  86   28 104   53 124   87 155  190 200
##          男             1  76    4  92    9 114   23 146   23 162   67 195  200 121
## 國中     女             5  32    5  38   10  68   21  71   29 107   38 135   47  73
##          男             0  53    5  81    7  98   13 101   21  97   32  88   40  45
## 高中     女            15 117   19 186   17 257   29 272   30 255   49 217   76 168
##          男             8 119   18 182   25 243   36 225   60 203   83 149  119 120
## 專科     女            11  78    7 124   10 200   13 128    9 105   12  54   23  57
##          男             3  68   14 164   27 225   28 135   34  99   45  80   75  39
## 大學以上 女             6 302    6 191    2  97    2  28    2  14    1  12    1  19
##          男             9 176   15  85    6  34    8  23    2   6    8   9    4   2

ftable(xtabs(結過婚 ~ 性別 + 教育 + as.factor(年次), data = dta))

##               as.factor(年次) 39.5 44.5 49.5 54.5 59.5 64.5 69.5
## 性別 教育                                                       
## 女   國小以下                   47   58   86  104  124  155  200
##      國中                       32   38   68   71  107  135   73
##      高中                      117  186  257  272  255  217  168
##      專科                       78  124  200  128  105   54   57
##      大學以上                  302  191   97   28   14   12   19
## 男   國小以下                   76   92  114  146  162  195  121
##      國中                       53   81   98  101   97   88   45
##      高中                      119  182  243  225  203  149  120
##      專科                       68  164  225  135   99   80   39
##      大學以上                  176   85   34   23    6    9    2

4 ggplot

#準備要畫圖，載入ggplot2
require(ggplot2)

## Loading required package: ggplot2

#圖示不同年次、性別與教育程度的結過婚比率，加上信賴區間
ggplot(data = dta, aes(x = 年次, y = 結過婚, shape = 性別, color = 性別)) +
  stat_summary(fun.data = mean_se, size = 1) +
  facet_grid(. ~ 教育 ) +
  labs(x = '年次', y = '結過婚百分比') +
  theme(legend.position = c (.95, .1))

5 logistic regression

summary(m1 <- glm(結過婚 ~ 年次, data = dta, family = binomial) )

## 
## Call:
## glm(formula = 結過婚 ~ 年次, family = binomial, data = dta)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5499   0.2811   0.4498   0.7040   1.0540  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  7.04825    0.19472   36.20   <2e-16 ***
## 年次        -0.09713    0.00319  -30.45   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 9913.1  on 9999  degrees of freedom
## Residual deviance: 8766.2  on 9998  degrees of freedom
## AIC: 8770.2
## 
## Number of Fisher Scoring iterations: 5

#勝算比的信賴區間 
exp(cbind(coef=coef(m1), confint(m1)))

## Waiting for profiling to be done...

##                     coef       2.5 %       97.5 %
## (Intercept) 1150.8459885 788.6829642 1692.1285115
## 年次           0.9074337   0.9017331    0.9130795

#邏輯迴歸，加入性別與教育程度
summary(m_full <- glm(結過婚 ~ 年次 * 性別 * 教育 , data = dta, family = binomial))

## 
## Call:
## glm(formula = 結過婚 ~ 年次 * 性別 * 教育, family = binomial, 
##     data = dta)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0831   0.2328   0.4607   0.6584   1.3921  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               4.774415   0.512535   9.315  < 2e-16 ***
## 年次                     -0.066837   0.008136  -8.215  < 2e-16 ***
## 性別男                    6.481320   0.956018   6.779 1.21e-11 ***
## 教育國中                 -0.519822   0.893145  -0.582  0.56056    
## 教育高中                  0.564630   0.721014   0.783  0.43356    
## 教育專科                  0.629152   0.905912   0.694  0.48737    
## 教育大學以上              1.102391   1.299680   0.848  0.39633    
## 年次:性別男              -0.098016   0.014898  -6.579 4.73e-11 ***
## 年次:教育國中             0.015964   0.014465   1.104  0.26976    
## 年次:教育高中             0.006055   0.011738   0.516  0.60595    
## 年次:教育專科             0.007865   0.015370   0.512  0.60882    
## 年次:教育大學以上         0.015564   0.025722   0.605  0.54513    
## 性別男:教育國中          -2.045260   1.518446  -1.347  0.17800    
## 性別男:教育高中          -4.635321   1.187586  -3.903 9.50e-05 ***
## 性別男:教育專科          -3.743424   1.354443  -2.764  0.00571 ** 
## 性別男:教育大學以上      -5.534359   1.765063  -3.136  0.00172 ** 
## 年次:性別男:教育國中      0.027590   0.024293   1.136  0.25609    
## 年次:性別男:教育高中      0.056880   0.018937   3.004  0.00267 ** 
## 年次:性別男:教育專科      0.035498   0.022259   1.595  0.11076    
## 年次:性別男:教育大學以上  0.044030   0.033610   1.310  0.19018    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 9913.1  on 9999  degrees of freedom
## Residual deviance: 8489.7  on 9980  degrees of freedom
## AIC: 8529.7
## 
## Number of Fisher Scoring iterations: 6

#看看那個效果可刪除
drop1(m_full, test = 'Chisq')

## Single term deletions
## 
## Model:
## 結過婚 ~ 年次 * 性別 * 教育
##                Df Deviance    AIC    LRT Pr(>Chi)  
## <none>              8489.7 8529.7                  
## 年次:性別:教育  4   8499.1 8531.1 9.4573  0.05063 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(m_drop1 <- update(m_full, ~ . - 年次:性別:教育) )

## 
## Call:
## glm(formula = 結過婚 ~ 年次 + 性別 + 教育 + 年次:性別 + 年次:教育 + 
##     性別:教育, family = binomial, data = dta)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8957   0.2352   0.4540   0.6443   1.4225  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          5.478361   0.460628  11.893  < 2e-16 ***
## 年次                -0.078070   0.007260 -10.753  < 2e-16 ***
## 性別男               4.303726   0.462789   9.300  < 2e-16 ***
## 教育國中            -1.065792   0.719274  -1.482  0.13840    
## 教育高中            -0.818436   0.558725  -1.465  0.14297    
## 教育專科            -0.120577   0.643046  -0.188  0.85126    
## 教育大學以上         0.087694   0.847448   0.103  0.91758    
## 年次:性別男         -0.063821   0.007125  -8.958  < 2e-16 ***
## 年次:教育國中        0.024595   0.011556   2.128  0.03331 *  
## 年次:教育高中        0.028792   0.009027   3.190  0.00143 ** 
## 年次:教育專科        0.019908   0.010677   1.865  0.06223 .  
## 年次:教育大學以上    0.033298   0.016165   2.060  0.03941 *  
## 性別男:教育國中     -0.270610   0.172757  -1.566  0.11725    
## 性別男:教育高中     -1.104423   0.139475  -7.918 2.41e-15 ***
## 性別男:教育專科     -1.491088   0.178288  -8.363  < 2e-16 ***
## 性別男:教育大學以上 -2.882467   0.314888  -9.154  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 9913.1  on 9999  degrees of freedom
## Residual deviance: 8499.1  on 9984  degrees of freedom
## AIC: 8531.1
## 
## Number of Fisher Scoring iterations: 6

#邏輯迴歸模型診斷，將預測值與實際觀測值放在一起
ggplot(data = dta, aes(x = 年次, y = 結過婚, shape = 性別, color = 性別)) +
  stat_summary(fun.data = mean_se, size = 1) +
  stat_summary(aes(y = fitted(m_drop1)), fun.y = mean, geom = 'line', size = 1) +
  facet_grid(. ~ 教育) +
  scale_x_continuous(breaks = seq(30, 70, by = 5)) +
  labs(x= '年次', y = '結過婚百分比') +
  theme(legend.position= c (.9, .1))

## Warning: `fun.y` is deprecated. Use `fun` instead.

6 add dispersion term

#修改邏輯迴歸，加入分散參數。載入 dispmod 套件，預備處理此事
require(dispmod)

## Loading required package: dispmod

#將資料整理成 dispmod 要的形式
dta_f <- data.frame(ftable(dta, row.vars = c(1, 2, 5), col.vars = '結過婚'))
dta_f1 <- subset(dta_f , 結過婚 == 1)
dta_f1$Tot <- dta_f[1:70, 5] + dta_f[71:140, 5]
dta_f1 <- dta_f1[,-4]

#將年次設定為連續變項
dta_f1$年次 <- as.numeric(as.vector(dta_f1$年次))

#看看資料
head(dta_f1)

##        教育 性別 年次 Freq Tot
## 71 國小以下   女 39.5   47  53
## 72     國中   女 39.5   32  37
## 73     高中   女 39.5  117 132
## 74     專科   女 39.5   78  89
## 75 大學以上   女 39.5  302 308
## 76 國小以下   男 39.5   76  77

#以整理過資料重新分析，確認與前面相同
summary(m_last <- glm(cbind(Freq, Tot-Freq) ~ 年次 * 性別 * 教育 - 年次:性別:教育, 
                    data = dta_f1, family = binomial))

## 
## Call:
## glm(formula = cbind(Freq, Tot - Freq) ~ 年次 * 性別 * 教育 - 
##     年次:性別:教育, family = binomial, data = dta_f1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.7255  -0.8572   0.0203   0.5868   3.1982  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          5.478361   0.460628  11.893  < 2e-16 ***
## 年次                -0.078070   0.007260 -10.753  < 2e-16 ***
## 性別男               4.303726   0.462789   9.300  < 2e-16 ***
## 教育國中            -1.065792   0.719274  -1.482  0.13840    
## 教育高中            -0.818436   0.558725  -1.465  0.14297    
## 教育專科            -0.120577   0.643045  -0.188  0.85126    
## 教育大學以上         0.087694   0.847449   0.103  0.91758    
## 年次:性別男         -0.063821   0.007125  -8.958  < 2e-16 ***
## 年次:教育國中        0.024595   0.011556   2.128  0.03331 *  
## 年次:教育高中        0.028792   0.009027   3.190  0.00143 ** 
## 年次:教育專科        0.019908   0.010677   1.865  0.06223 .  
## 年次:教育大學以上    0.033298   0.016165   2.060  0.03941 *  
## 性別男:教育國中     -0.270610   0.172757  -1.566  0.11725    
## 性別男:教育高中     -1.104423   0.139475  -7.918 2.41e-15 ***
## 性別男:教育專科     -1.491088   0.178288  -8.363  < 2e-16 ***
## 性別男:教育大學以上 -2.882467   0.314888  -9.154  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1530.13  on 69  degrees of freedom
## Residual deviance:  116.19  on 54  degrees of freedom
## AIC: 446.31
## 
## Number of Fisher Scoring iterations: 4

#加入分散參數
summary(m_lastd <- glm.binomial.disp(m_last))

## 
## Binomial overdispersed logit model fitting...
## Iter.  1  phi: 0.009144343 
## Iter.  2  phi: 0.0068854 
## Iter.  3  phi: 0.007239847 
## Iter.  4  phi: 0.007179674 
## Iter.  5  phi: 0.007189756 
## Iter.  6  phi: 0.007188063 
## Iter.  7  phi: 0.007188347 
## Iter.  8  phi: 0.0071883 
## Iter.  9  phi: 0.007188308 
## Iter.  10  phi: 0.007188306 
## Iter.  11  phi: 0.007188306 
## Converged after 11 iterations. 
## Estimated dispersion parameter: 0.007188306 
## 
## Call:
## glm(formula = cbind(Freq, Tot - Freq) ~ 年次 * 性別 * 教育 - 
##     年次:性別:教育, family = binomial, data = dta_f1, weights = disp.weights)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.34206  -0.58236   0.01665   0.49197   1.91197  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          5.08101    0.63580   7.992 1.33e-15 ***
## 年次                -0.07189    0.01043  -6.895 5.38e-12 ***
## 性別男               4.41527    0.64693   6.825 8.80e-12 ***
## 教育國中            -0.71418    0.93255  -0.766   0.4438    
## 教育高中            -0.70145    0.81672  -0.859   0.3904    
## 教育專科             0.15978    0.89475   0.179   0.8583    
## 教育大學以上         0.24864    1.12922   0.220   0.8257    
## 年次:性別男         -0.06503    0.01015  -6.407 1.49e-10 ***
## 年次:教育國中        0.01878    0.01533   1.225   0.2204    
## 年次:教育高中        0.02688    0.01355   1.984   0.0473 *  
## 年次:教育專科        0.01487    0.01495   0.995   0.3200    
## 年次:教育大學以上    0.03072    0.02041   1.505   0.1322    
## 性別男:教育國中     -0.28931    0.24870  -1.163   0.2447    
## 性別男:教育高中     -1.13265    0.22766  -4.975 6.52e-07 ***
## 性別男:教育專科     -1.48291    0.25459  -5.825 5.72e-09 ***
## 性別男:教育大學以上 -2.87409    0.41432  -6.937 4.01e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 633.454  on 69  degrees of freedom
## Residual deviance:  52.675  on 54  degrees of freedom
## AIC: 236.78
## 
## Number of Fisher Scoring iterations: 4

## 
## Call:
## glm(formula = cbind(Freq, Tot - Freq) ~ 年次 * 性別 * 教育 - 
##     年次:性別:教育, family = binomial, data = dta_f1, weights = disp.weights)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.34206  -0.58236   0.01665   0.49197   1.91197  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          5.08101    0.63580   7.992 1.33e-15 ***
## 年次                -0.07189    0.01043  -6.895 5.38e-12 ***
## 性別男               4.41527    0.64693   6.825 8.80e-12 ***
## 教育國中            -0.71418    0.93255  -0.766   0.4438    
## 教育高中            -0.70145    0.81672  -0.859   0.3904    
## 教育專科             0.15978    0.89475   0.179   0.8583    
## 教育大學以上         0.24864    1.12922   0.220   0.8257    
## 年次:性別男         -0.06503    0.01015  -6.407 1.49e-10 ***
## 年次:教育國中        0.01878    0.01533   1.225   0.2204    
## 年次:教育高中        0.02688    0.01355   1.984   0.0473 *  
## 年次:教育專科        0.01487    0.01495   0.995   0.3200    
## 年次:教育大學以上    0.03072    0.02041   1.505   0.1322    
## 性別男:教育國中     -0.28931    0.24870  -1.163   0.2447    
## 性別男:教育高中     -1.13265    0.22766  -4.975 6.52e-07 ***
## 性別男:教育專科     -1.48291    0.25459  -5.825 5.72e-09 ***
## 性別男:教育大學以上 -2.87409    0.41432  -6.937 4.01e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 633.454  on 69  degrees of freedom
## Residual deviance:  52.675  on 54  degrees of freedom
## AIC: 236.78
## 
## Number of Fisher Scoring iterations: 4

#看看分散參數估計值
m_lastd$dispersion

## [1] 0.007188306

#看看模型是否改善
anova(m_last, m_lastd)

## Analysis of Deviance Table
## 
## Model 1: cbind(Freq, Tot - Freq) ~ 年次 * 性別 * 教育 - 年次:性別:教育
## Model 2: cbind(Freq, Tot - Freq) ~ 年次 * 性別 * 教育 - 年次:性別:教育
##   Resid. Df Resid. Dev Df Deviance
## 1        54    116.189            
## 2        54     52.675  0   63.515

#準備再一次模型診斷，收集資料，畫圖
dta_mlastd <- data.frame(dta_f1, phat = fitted(m_lastd))
ggplot(data = dta_mlastd, aes(x = 年次, y = Freq/Tot,  color = 性別, shape = 性別)) +
  geom_point(size = 3) +
  stat_smooth(aes(x = 年次, y = phat), method = 'loess', se = F, size = 1) +
  facet_grid(. ~ 教育) +
  scale_x_continuous(breaks = seq(30, 70, by = 5)) +
  labs(x= '年次', y = '結過婚百分比') +
  theme(legend.position = 'NONE' )

## `geom_smooth()` using formula 'y ~ x'

#準備繪圖。為了繪製方便，模型中去掉截距
summary(m_lastd0 <- update(m_lastd, . ~ . - 1))

## 
## Call:
## glm(formula = cbind(Freq, Tot - Freq) ~ 年次 + 性別 + 教育 + 
##     年次:性別 + 年次:教育 + 性別:教育 - 1, family = binomial, 
##     data = dta_f1, weights = disp.weights)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.34206  -0.58236   0.01665   0.49197   1.91197  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## 年次                -0.07189    0.01043  -6.895 5.38e-12 ***
## 性別女               5.08101    0.63580   7.992 1.33e-15 ***
## 性別男               9.49628    0.75372  12.599  < 2e-16 ***
## 教育國中            -0.71418    0.93255  -0.766   0.4438    
## 教育高中            -0.70145    0.81672  -0.859   0.3904    
## 教育專科             0.15978    0.89475   0.179   0.8583    
## 教育大學以上         0.24864    1.12922   0.220   0.8257    
## 年次:性別男         -0.06503    0.01015  -6.407 1.49e-10 ***
## 年次:教育國中        0.01878    0.01533   1.225   0.2204    
## 年次:教育高中        0.02688    0.01355   1.984   0.0473 *  
## 年次:教育專科        0.01487    0.01495   0.995   0.3200    
## 年次:教育大學以上    0.03072    0.02041   1.505   0.1322    
## 性別男:教育國中     -0.28931    0.24870  -1.163   0.2447    
## 性別男:教育高中     -1.13265    0.22766  -4.975 6.52e-07 ***
## 性別男:教育專科     -1.48291    0.25459  -5.825 5.72e-09 ***
## 性別男:教育大學以上 -2.87409    0.41432  -6.937 4.01e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2504.909  on 70  degrees of freedom
## Residual deviance:   52.675  on 54  degrees of freedom
## AIC: 236.78
## 
## Number of Fisher Scoring iterations: 4

require(coefplot)

## Loading required package: coefplot

#繪製估計值
coefplot(m_lastd0) + labs(x = '估計值', y = '變項', title = '')

7 多分類邏輯迴歸

#先看看年代與婚姻列聯表
with(dta, prop.table(table(年次, 婚姻), 1))

##       婚姻
## 年次          有偶        未婚        離婚        喪偶
##   39.5 0.719964664 0.056537102 0.113957597 0.109540636
##   44.5 0.728177642 0.080398162 0.129402757 0.062021440
##   49.5 0.713735558 0.087291399 0.155327343 0.043645700
##   54.5 0.716875872 0.140167364 0.120641562 0.022315202
##   59.5 0.671080139 0.183275261 0.135888502 0.009756098
##   64.5 0.622031662 0.278364116 0.093007916 0.006596306
##   69.5 0.455836936 0.478690550 0.058060531 0.007411983

#載入 mlogit 套件，準備執行多分類邏輯迴歸
require(mlogit)

## Loading required package: mlogit

## Loading required package: dfidx

## 
## Attaching package: 'dfidx'

## The following object is masked from 'package:stats':
## 
##     filter

#將資料轉成 mlogit 需要格式
dta_w <- mlogit.data(dta, shape = 'wide', choice = '婚姻')

#看看資料
head(dta_w)

## ~~~~~~~
##  first 10 observations out of 40000 
## ~~~~~~~
##        教育 性別  婚姻 年齡 年次 結過婚 chid  alt    idx
## 1  國小以下   女  TRUE 32.5 69.5      0    1 未婚 1:未婚
## 2  國小以下   女 FALSE 32.5 69.5      0    1 有偶 1:有偶
## 3  國小以下   女 FALSE 32.5 69.5      0    1 喪偶 1:喪偶
## 4  國小以下   女 FALSE 32.5 69.5      0    1 離婚 1:離婚
## 5      專科   男 FALSE 57.5 44.5      1    2 未婚 2:未婚
## 6      專科   男  TRUE 57.5 44.5      1    2 有偶 2:有偶
## 7      專科   男 FALSE 57.5 44.5      1    2 喪偶 2:喪偶
## 8      專科   男 FALSE 57.5 44.5      1    2 離婚 2:離婚
## 9      國中   男 FALSE 42.5 59.5      1    3 未婚 3:未婚
## 10     國中   男  TRUE 42.5 59.5      1    3 有偶 3:有偶
## 
## ~~~ indexes ~~~~
##    chid  alt
## 1     1 未婚
## 2     1 有偶
## 3     1 喪偶
## 4     1 離婚
## 5     2 未婚
## 6     2 有偶
## 7     2 喪偶
## 8     2 離婚
## 9     3 未婚
## 10    3 有偶
## indexes:  1, 2

#只展示年代效果
summary(ml0 <- mlogit(婚姻 ~ 0 | 年次, data = dta_w ) )

## 
## Call:
## mlogit(formula = 婚姻 ~ 0 | 年次, data = dta_w, method = "nr")
## 
## Frequencies of alternatives:choice
##   未婚   有偶   喪偶   離婚 
## 0.1966 0.6550 0.0341 0.1143 
## 
## nr method
## 7 iterations, 0h:0m:2s 
## g'(-H)^-1g = 0.000103 
## successive function values within tolerance limits 
## 
## Coefficients :
##                    Estimate Std. Error z-value  Pr(>|z|)    
## (Intercept):有偶  6.6554207  0.1975766  33.685 < 2.2e-16 ***
## (Intercept):喪偶  8.5654937  0.3956179  21.651 < 2.2e-16 ***
## (Intercept):離婚  5.2686719  0.2507264  21.014 < 2.2e-16 ***
## 年次:有偶        -0.0935790  0.0032415 -28.869 < 2.2e-16 ***
## 年次:喪偶        -0.1905932  0.0079392 -24.007 < 2.2e-16 ***
## 年次:離婚        -0.1002414  0.0043105 -23.255 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Log-Likelihood: -8918.3
## McFadden R^2:  0.07105 
## Likelihood ratio test : chisq = 1364.2 (p.value = < 2.22e-16)

#勝算比信賴區間
cbind(b_hat = exp(coef(ml0)), b_ci = exp(confint(ml0)))

##                         b_hat        2.5 %       97.5 %
## (Intercept):有偶  776.9847186  527.5152157 1.144432e+03
## (Intercept):喪偶 5247.4301390 2416.5519282 1.139455e+04
## (Intercept):離婚  194.1579345  118.7781080 3.173759e+02
## 年次:有偶           0.9106660    0.9048986 9.164702e-01
## 年次:喪偶           0.8264687    0.8137080 8.394295e-01
## 年次:離婚           0.9046190    0.8970085 9.122941e-01

#準備畫圖，先得到預測值
newdta <- with(dta, data.frame(
               年次 = rep(seq(from = 39.5, to = 69.5, length = 7), 4),
               婚姻 = factor(rep(c('有偶', '未婚', '離婚', '喪偶'), each = 7) ) ) )
 
newdta_w <- mlogit.data(newdta, shape = 'wide', choice = '婚姻')
ml0_phat <- as.data.frame(predict(ml0, newdta_w)[1:7, ])

#需轉換資料形式，載入 reshape 套件
require(reshape2)

## Loading required package: reshape2

#製造年次婚姻列聯表、將預測值轉成需要格式、彙整在一起
obs_p <- data.frame(with(dta, prop.table(table(年次, 婚姻), 1)))
dta_op <- data.frame(obs_p, melt(ml0_phat))

## No id variables; using all as measure variables

#繪圖
ggplot(data = dta_op, aes(x = 年次, y = Freq, shape = 婚姻)) +
 geom_point(size = 3) + 
 stat_smooth(aes(x = 年次, y = value, group = 婚姻), method = 'loess', se = F, size = 1) +
 labs(x = '年次', y = '分類百分比') +
 theme(legend.position = c(.92, .88))

## `geom_smooth()` using formula 'y ~ x'

1026hw1

1 input data

2 data management

3 table

4 ggplot

5 logistic regression

6 add dispersion term

7 多分類邏輯迴歸