Data

#
dta <- read.csv("data0419.csv", header = T)
options(digits = 3)
pacman::p_load(tidyverse, ggplot2)
dta <- dta %>% mutate( Gender = relevel(Gender, ref = "女"),
                       Sector = relevel(Sector, ref = "私立"),
                       Field1 = relevel(Field1, ref = "遊憩與運動學群"),
                       EduLv = factor(EduLv, levels=c("博士","碩士","普通大學","科技大學",
                                                      "技術學院","五專","三專",
                                                      "二專","高中","高職","國中")),
                       EduLv = relevel(EduLv, ref = "技術學院"),
                       Region = factor(Region, levels =c("宜花東離島","北北基","桃竹苗",
                                                         "中彰投","雲嘉南","高屏澎")),
                       Age = as.numeric(Age), 
                       J_year = as.numeric(J_year), 
                       JobZone = as.numeric(JobZone),
                       EduZone = as.numeric(EduZone),
                       JobZone_D = as.numeric(EduZone-JobZone),
                       Salary = as.numeric(Salary),
                       SubEduOver = relevel(SubEduOver, ref="符合工作要求"),
                       Core = recode_factor(as.factor(JobCor), "1" = "無關聯",
                                            "2" = "部分關聯",
                                            "3" = "核心關聯"),
                       SubEduOver = factor(SubEduOver,levels =c("符合工作要求","高於工作要求","低於工作要求")))%>%  
  filter(Age >= 20)
# data construction
glimpse(dta)
## Observations: 1,568
## Variables: 25
## $ SID         <fctr> A10, A100, A103, A104, A105, A106, A107, A108, A1...
## $ Gender      <fctr> 女, 女, 男, 女, 女, 女, 女, 女, 女, 男, 男, 男, 女, 男, 男, 女, 女...
## $ Sector      <fctr> 國立(公立), 私立, 國立(公立), 國立(公立), 國立(公立), 私立, 私立, 國立(公立...
## $ EduLv       <fctr> 碩士, 普通大學, 高職, 普通大學, 普通大學, 普通大學, 普通大學, 普通大學, 碩士, 碩...
## $ SubEduOver  <fctr> 高於工作要求, 符合工作要求, 符合工作要求, 符合工作要求, 符合工作要求, 符合工作要求, 符...
## $ Require     <fctr> 高中/高職, 普通大學, 高中/高職, 普通大學, 普通大學, 普通大學, 普通大學, 普通大學,...
## $ Field1      <fctr> 資訊學群, 外語學群, 工程學群, 文史哲學群, 文史哲學群, 大眾傳播學群, 大眾傳播學群, 藝...
## $ City        <fctr> 高雄市, 苗栗縣, 高雄市, 南投縣, 嘉義市, 臺北市, 臺北市, 南投縣, 高雄市, 臺中市,...
## $ Category    <fctr> 受雇於公營機關, 受雇於公營機關, 受雇者於私營企業, 受雇於公營機關, 受雇者於私營企業, 受雇...
## $ Staff       <fctr> 50-99人, 50-99人, 2-9人, 100-199人, 10-29人, 30-49人, 3...
## $ Hours       <int> 40, 70, 57, 51, 64, 50, 50, 47, 50, 60, 45, 40, 56...
## $ J_year      <dbl> 8, 4, 21, 1, 6, 0, 1, 1, 17, 7, 3, 23, 1, 2, 1, 1,...
## $ J_total     <dbl> 8, 4, 30, 1, 6, 0, 2, 2, 28, 7, 30, 26, 1, 10, 1, ...
## $ income      <fctr> 2-3萬以下, 3-4萬以下, 3-4萬以下, 4-5萬以下, 2萬以下, 3-4萬以下, 3-4...
## $ SubMismatch <int> 2, 3, 5, 4, 5, 4, 3, 3, 4, 4, 4, 4, 5, 5, 3, 2, 4,...
## $ JobSat      <int> 4, 3, 5, 6, 7, 5, 3, 6, 3, 5, 4, 7, 3, 4, 4, 4, 5,...
## $ EduZone     <dbl> 5, 4, 2, 4, 4, 4, 4, 4, 5, 5, 3, 5, 4, 4, 4, 4, 5,...
## $ Region      <fctr> 高屏澎, 桃竹苗, 高屏澎, 中彰投, 雲嘉南, 北北基, 北北基, 中彰投, 高屏澎, 中彰投,...
## $ Salary      <dbl> 25000, 35000, 35000, 45000, 20000, 35000, 35000, 4...
## $ Age         <dbl> 34, 30, 62, 25, 21, 24, 25, 26, 57, 35, 54, 54, 23...
## $ JobZone     <dbl> 3, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 3, 2, 4,...
## $ JobCor      <int> 1, 2, 1, 2, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 3, 1, 2,...
## $ Core        <fctr> 無關聯, 部分關聯, 無關聯, 部分關聯, 無關聯, 核心關聯, 無關聯, 無關聯, 無關聯, 核...
## $ ObjOver     <fctr> over, adequate, under, adequate, adequate, adequa...
## $ JobZone_D   <dbl> 2, 0, -1, 0, 0, 0, 0, 0, 1, 1, -1, 0, 0, 0, 1, 2, ...
# NA
apply(apply(dta, 1, is.na), 1, sum)
##         SID      Gender      Sector       EduLv  SubEduOver     Require 
##           0           0           0           0           0           0 
##      Field1        City    Category       Staff       Hours      J_year 
##           0           0           0           0           0           0 
##     J_total      income SubMismatch      JobSat     EduZone      Region 
##           0           0           0           0           0           0 
##      Salary         Age     JobZone      JobCor        Core     ObjOver 
##           0           0           0           0           0           0 
##   JobZone_D 
##           0
# check and pick out
lapply(dta[,c("Sector", "Field1", "City", "Region","EduLv", "SubEduOver", "ObjOver")], levels)
## $Sector
## [1] "私立"         "國外學校"     "國立(公立)"
## 
## $Field1
##  [1] "遊憩與運動學群" "大眾傳播學群"   "工程學群"       "文史哲學群"    
##  [5] "外語學群"       "生命科學學群"   "生物資源學群"   "地球與環境學群"
##  [9] "法政學群"       "社會與心理學群" "建築與設計學群" "財經學群"      
## [13] "教育學群"       "資訊學群"       "管理學群"       "數理化學群"    
## [17] "醫藥衛生學群"   "藝術學群"      
## 
## $City
##  [1] "宜蘭縣" "花蓮縣" "金門縣" "南投縣" "屏東縣" "苗栗縣" "桃園市"
##  [8] "高雄市" "基隆市" "雲林縣" "新北市" "新竹市" "新竹縣" "嘉義市"
## [15] "嘉義縣" "彰化縣" "臺中市" "臺北市" "臺東縣" "臺南市" "澎湖縣"
## 
## $Region
## [1] "宜花東離島" "北北基"     "桃竹苗"     "中彰投"     "雲嘉南"    
## [6] "高屏澎"    
## 
## $EduLv
##  [1] "技術學院" "博士"     "碩士"     "普通大學" "科技大學" "五專"    
##  [7] "三專"     "二專"     "高中"     "高職"     "國中"    
## 
## $SubEduOver
## [1] "符合工作要求" "高於工作要求" "低於工作要求"
## 
## $ObjOver
## [1] "adequate" "over"     "under"
names(dta)
##  [1] "SID"         "Gender"      "Sector"      "EduLv"       "SubEduOver" 
##  [6] "Require"     "Field1"      "City"        "Category"    "Staff"      
## [11] "Hours"       "J_year"      "J_total"     "income"      "SubMismatch"
## [16] "JobSat"      "EduZone"     "Region"      "Salary"      "Age"        
## [21] "JobZone"     "JobCor"      "Core"        "ObjOver"     "JobZone_D"
p <- dplyr::select(dta, -City, -income, -JobSat)

Model

基本模型

對照組設定:
女、私立、遊憩運動學群、技術學院、宜花東離島、過量教育(符合工作要求)、客評關聯(無關聯)

分別看年齡與工作年資的效果

#
lm1 <- lm(log(Salary) ~ Gender + Age + Sector + Region + Hours, data = p)
summary(lm1)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + Age + Sector + Region + Hours, 
##     data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0732 -0.2324 -0.0141  0.2017  1.7082 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         9.581178   0.078394  122.22  < 2e-16 ***
## Gender男            0.180206   0.018919    9.52  < 2e-16 ***
## Age                 0.022415   0.001277   17.55  < 2e-16 ***
## Sector國外學校      0.321423   0.071735    4.48  8.0e-06 ***
## Sector國立(公立)  0.168267   0.018853    8.93  < 2e-16 ***
## Region北北基        0.047873   0.051154    0.94   0.3495    
## Region桃竹苗        0.150990   0.054430    2.77   0.0056 ** 
## Region中彰投       -0.057720   0.053995   -1.07   0.2852    
## Region雲嘉南       -0.061314   0.053261   -1.15   0.2498    
## Region高屏澎       -0.095102   0.053134   -1.79   0.0737 .  
## Hours               0.004167   0.000953    4.37  1.3e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.362 on 1557 degrees of freedom
## Multiple R-squared:  0.295,  Adjusted R-squared:  0.291 
## F-statistic: 65.2 on 10 and 1557 DF,  p-value: <2e-16
#
lm11 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours , data = p)
summary(lm11)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9217 -0.2546 -0.0138  0.2134  1.8340 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.137618   0.070260  144.29  < 2e-16 ***
## Gender男            0.194683   0.019515    9.98  < 2e-16 ***
## J_year              0.023136   0.001684   13.74  < 2e-16 ***
## Sector國外學校      0.368650   0.074000    4.98  7.0e-07 ***
## Sector國立(公立)  0.176812   0.019500    9.07  < 2e-16 ***
## Region北北基        0.060635   0.052915    1.15   0.2520    
## Region桃竹苗        0.171719   0.056311    3.05   0.0023 ** 
## Region中彰投       -0.045241   0.055835   -0.81   0.4179    
## Region雲嘉南       -0.056319   0.055048   -1.02   0.3064    
## Region高屏澎       -0.073569   0.054904   -1.34   0.1805    
## Hours               0.004150   0.000985    4.21  2.7e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.374 on 1557 degrees of freedom
## Multiple R-squared:  0.247,  Adjusted R-squared:  0.242 
## F-statistic: 51.1 on 10 and 1557 DF,  p-value: <2e-16

加入學歷/學群

#
lm21 <- lm(log(Salary) ~ Gender + J_year + Sector  +Region + Hours+ EduLv , data = p)
summary(lm21)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + EduLv, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9267 -0.2229 -0.0171  0.1873  1.8520 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.10554    0.08561  118.04  < 2e-16 ***
## Gender男            0.18835    0.01853   10.17  < 2e-16 ***
## J_year              0.02384    0.00169   14.14  < 2e-16 ***
## Sector國外學校      0.14817    0.07201    2.06  0.03979 *  
## Sector國立(公立)  0.07792    0.01999    3.90  0.00010 ***
## Region北北基        0.04135    0.04988    0.83  0.40727    
## Region桃竹苗        0.11183    0.05328    2.10  0.03600 *  
## Region中彰投       -0.05160    0.05262   -0.98  0.32700    
## Region雲嘉南       -0.08297    0.05200   -1.60  0.11079    
## Region高屏澎       -0.07421    0.05180   -1.43  0.15217    
## Hours               0.00361    0.00093    3.88  0.00011 ***
## EduLv博士           0.54772    0.12763    4.29  1.9e-05 ***
## EduLv碩士           0.32912    0.05962    5.52  4.0e-08 ***
## EduLv普通大學       0.09173    0.05786    1.59  0.11307    
## EduLv科技大學      -0.00558    0.06011   -0.09  0.92607    
## EduLv五專           0.13609    0.08494    1.60  0.10931    
## EduLv三專          -0.13029    0.35805   -0.36  0.71599    
## EduLv二專           0.03531    0.08222    0.43  0.66769    
## EduLv高中           0.18480    0.25631    0.72  0.47103    
## EduLv高職          -0.21464    0.08079   -2.66  0.00797 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.353 on 1548 degrees of freedom
## Multiple R-squared:  0.336,  Adjusted R-squared:  0.328 
## F-statistic: 41.2 on 19 and 1548 DF,  p-value: <2e-16
#
lm22 <- lm(log(Salary) ~ Gender + J_year + Sector  +Region + Hours+ Field1 , data = p)
summary(lm22)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + Field1, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9261 -0.2478 -0.0178  0.2127  2.0052 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          10.057216   0.093736  107.29  < 2e-16 ***
## Gender男              0.149272   0.021941    6.80  1.5e-11 ***
## J_year                0.022387   0.001664   13.45  < 2e-16 ***
## Sector國外學校        0.379724   0.072981    5.20  2.2e-07 ***
## Sector國立(公立)    0.185438   0.020664    8.97  < 2e-16 ***
## Region北北基          0.081320   0.052310    1.55  0.12026    
## Region桃竹苗          0.166556   0.055840    2.98  0.00290 ** 
## Region中彰投         -0.021551   0.055114   -0.39  0.69582    
## Region雲嘉南         -0.049608   0.054263   -0.91  0.36075    
## Region高屏澎         -0.072164   0.054159   -1.33  0.18291    
## Hours                 0.003476   0.000974    3.57  0.00037 ***
## Field1大眾傳播學群    0.027006   0.083860    0.32  0.74746    
## Field1工程學群        0.239929   0.068770    3.49  0.00050 ***
## Field1文史哲學群      0.015746   0.075661    0.21  0.83517    
## Field1外語學群        0.099669   0.075549    1.32  0.18727    
## Field1生命科學學群    0.106513   0.089361    1.19  0.23347    
## Field1生物資源學群    0.056530   0.095606    0.59  0.55442    
## Field1地球與環境學群  0.062680   0.098800    0.63  0.52590    
## Field1法政學群        0.097547   0.079010    1.23  0.21716    
## Field1社會與心理學群  0.092296   0.071828    1.28  0.19900    
## Field1建築與設計學群 -0.029695   0.079388   -0.37  0.70842    
## Field1財經學群        0.095413   0.072192    1.32  0.18648    
## Field1教育學群        0.123934   0.074014    1.67  0.09424 .  
## Field1資訊學群        0.146845   0.072048    2.04  0.04171 *  
## Field1管理學群        0.033969   0.070390    0.48  0.62946    
## Field1數理化學群      0.177664   0.079138    2.25  0.02491 *  
## Field1醫藥衛生學群    0.278198   0.072985    3.81  0.00014 ***
## Field1藝術學群       -0.022692   0.086327   -0.26  0.79270    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.367 on 1540 degrees of freedom
## Multiple R-squared:  0.286,  Adjusted R-squared:  0.273 
## F-statistic: 22.8 on 27 and 1540 DF,  p-value: <2e-16

過量

客觀只有線縮教育的部分顯著>< 不~
自評的效果倒是很好

# 客觀
lm31 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours +ObjOver , data = p)
summary(lm31)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + ObjOver, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9224 -0.2506 -0.0126  0.2168  1.8321 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.121816   0.071124  142.31  < 2e-16 ***
## Gender男            0.192688   0.019500    9.88  < 2e-16 ***
## J_year              0.022876   0.001691   13.53  < 2e-16 ***
## Sector國外學校      0.373561   0.073977    5.05  4.9e-07 ***
## Sector國立(公立)  0.182506   0.019671    9.28  < 2e-16 ***
## Region北北基        0.058555   0.052840    1.11    0.268    
## Region桃竹苗        0.167419   0.056314    2.97    0.003 ** 
## Region中彰投       -0.050737   0.055808   -0.91    0.363    
## Region雲嘉南       -0.063710   0.055037   -1.16    0.247    
## Region高屏澎       -0.079604   0.054868   -1.45    0.147    
## Hours               0.004058   0.000985    4.12  4.0e-05 ***
## ObjOverover         0.026216   0.020879    1.26    0.209    
## ObjOverunder        0.103931   0.040371    2.57    0.010 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.374 on 1555 degrees of freedom
## Multiple R-squared:  0.25,   Adjusted R-squared:  0.245 
## F-statistic: 43.3 on 12 and 1555 DF,  p-value: <2e-16
# 自評
lm32 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + SubEduOver , data = p)
summary(lm32)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + SubEduOver, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9488 -0.2389 -0.0265  0.2032  1.7671 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            10.186977   0.069174  147.27  < 2e-16 ***
## Gender男                0.206814   0.019246   10.75  < 2e-16 ***
## J_year                  0.022222   0.001659   13.40  < 2e-16 ***
## Sector國外學校          0.400140   0.072814    5.50  4.5e-08 ***
## Sector國立(公立)      0.171572   0.019307    8.89  < 2e-16 ***
## Region北北基            0.043488   0.051931    0.84   0.4025    
## Region桃竹苗            0.156990   0.055253    2.84   0.0046 ** 
## Region中彰投           -0.057601   0.054772   -1.05   0.2931    
## Region雲嘉南           -0.068673   0.054003   -1.27   0.2037    
## Region高屏澎           -0.080964   0.053854   -1.50   0.1329    
## Hours                   0.004493   0.000967    4.64  3.7e-06 ***
## SubEduOver高於工作要求 -0.154841   0.023760   -6.52  9.7e-11 ***
## SubEduOver低於工作要求 -0.174413   0.030482   -5.72  1.3e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.367 on 1555 degrees of freedom
## Multiple R-squared:  0.277,  Adjusted R-squared:  0.271 
## F-statistic: 49.7 on 12 and 1555 DF,  p-value: <2e-16

關聯

無論是客觀的關聯還是自評的關聯程度效果都很不錯

## 關聯
# 客觀
lm41 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + Core , data = p)
summary(lm41)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + Core, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9703 -0.2486 -0.0198  0.2075  1.8821 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.085836   0.069678  144.75  < 2e-16 ***
## Gender男            0.201106   0.019264   10.44  < 2e-16 ***
## J_year              0.023695   0.001664   14.24  < 2e-16 ***
## Sector國外學校      0.382650   0.073025    5.24  1.8e-07 ***
## Sector國立(公立)  0.174247   0.019282    9.04  < 2e-16 ***
## Region北北基        0.063799   0.052168    1.22   0.2215    
## Region桃竹苗        0.177935   0.055523    3.20   0.0014 ** 
## Region中彰投       -0.034378   0.055067   -0.62   0.5325    
## Region雲嘉南       -0.051353   0.054276   -0.95   0.3442    
## Region高屏澎       -0.069485   0.054128   -1.28   0.1994    
## Hours               0.003985   0.000973    4.10  4.4e-05 ***
## Core部分關聯        0.131083   0.028462    4.61  4.4e-06 ***
## Core核心關聯        0.133151   0.022243    5.99  2.7e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.369 on 1555 degrees of freedom
## Multiple R-squared:  0.269,  Adjusted R-squared:  0.264 
## F-statistic: 47.8 on 12 and 1555 DF,  p-value: <2e-16
# 自評
lm42 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + SubMismatch , data = p)
summary(lm42)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + SubMismatch, data = p)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0169 -0.2414 -0.0095  0.1995  1.6998 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         9.931681   0.071733  138.45  < 2e-16 ***
## Gender男            0.206495   0.019025   10.85  < 2e-16 ***
## J_year              0.022649   0.001639   13.82  < 2e-16 ***
## Sector國外學校      0.365808   0.071985    5.08  4.2e-07 ***
## Sector國立(公立)  0.163094   0.019024    8.57  < 2e-16 ***
## Region北北基        0.057717   0.051475    1.12   0.2623    
## Region桃竹苗        0.170214   0.054777    3.11   0.0019 ** 
## Region中彰投       -0.042604   0.054315   -0.78   0.4329    
## Region雲嘉南       -0.058652   0.053549   -1.10   0.2736    
## Region高屏澎       -0.068784   0.053411   -1.29   0.1980    
## Hours               0.003976   0.000959    4.15  3.5e-05 ***
## SubMismatch         0.070426   0.007448    9.46  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.364 on 1556 degrees of freedom
## Multiple R-squared:  0.288,  Adjusted R-squared:  0.283 
## F-statistic: 57.2 on 11 and 1556 DF,  p-value: <2e-16

Try - 資料留大專院校

客觀過量教育一樣只有限縮的部份有顯著,客觀關聯效果高顯著
自評過量、關聯效果一樣不錯><

# 
p1 <- p[ p$EduLv %in% 
           c("博士","碩士", "普通大學", "科技大學", "技術學院", "五專","三專","二專"), ]
#
lmn1 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + ObjOver , data = p1)
summary(lmn1)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + ObjOver, data = p1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9261 -0.2488 -0.0159  0.2117  1.8417 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.144147   0.071848  141.19  < 2e-16 ***
## Gender男            0.196379   0.019591   10.02  < 2e-16 ***
## J_year              0.024592   0.001756   14.01  < 2e-16 ***
## Sector國外學校      0.364167   0.073345    4.97  7.6e-07 ***
## Sector國立(公立)  0.185631   0.019808    9.37  < 2e-16 ***
## Region北北基        0.050767   0.053284    0.95  0.34086    
## Region桃竹苗        0.171319   0.056801    3.02  0.00260 ** 
## Region中彰投       -0.053554   0.056257   -0.95  0.34127    
## Region雲嘉南       -0.062325   0.055650   -1.12  0.26291    
## Region高屏澎       -0.077265   0.055407   -1.39  0.16337    
## Hours               0.003727   0.000991    3.76  0.00018 ***
## ObjOverover         0.011764   0.020944    0.56  0.57442    
## ObjOverunder        0.136029   0.043430    3.13  0.00177 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.37 on 1515 degrees of freedom
## Multiple R-squared:  0.26,   Adjusted R-squared:  0.254 
## F-statistic: 44.4 on 12 and 1515 DF,  p-value: <2e-16
#
lmn2 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + Core , data = p1)
summary(lmn2)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + Core, data = p1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9643 -0.2467 -0.0187  0.2050  1.8851 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.102793   0.070633  143.03  < 2e-16 ***
## Gender男            0.203289   0.019391   10.48  < 2e-16 ***
## J_year              0.025279   0.001736   14.57  < 2e-16 ***
## Sector國外學校      0.370316   0.072577    5.10  3.8e-07 ***
## Sector國立(公立)  0.173668   0.019426    8.94  < 2e-16 ***
## Region北北基        0.058845   0.052706    1.12   0.2644    
## Region桃竹苗        0.182187   0.056096    3.25   0.0012 ** 
## Region中彰投       -0.034615   0.055600   -0.62   0.5337    
## Region雲嘉南       -0.047759   0.054967   -0.87   0.3851    
## Region高屏澎       -0.064764   0.054760   -1.18   0.2371    
## Hours               0.003657   0.000981    3.73   0.0002 ***
## Core部分關聯        0.127871   0.028568    4.48  8.2e-06 ***
## Core核心關聯        0.126141   0.022216    5.68  1.6e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.366 on 1515 degrees of freedom
## Multiple R-squared:  0.276,  Adjusted R-squared:  0.27 
## F-statistic: 48.1 on 12 and 1515 DF,  p-value: <2e-16
#
lmn3 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours +SubEduOver +SubMismatch , data = p1)
summary(lmn3)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + SubEduOver + SubMismatch, data = p1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0553 -0.2376 -0.0135  0.1915  1.6806 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            10.034604   0.073523  136.48  < 2e-16 ***
## Gender男                0.216976   0.019025   11.40  < 2e-16 ***
## J_year                  0.023532   0.001699   13.85  < 2e-16 ***
## Sector國外學校          0.377727   0.071012    5.32  1.2e-07 ***
## Sector國立(公立)      0.159363   0.019150    8.32  < 2e-16 ***
## Region北北基            0.037633   0.051500    0.73   0.4651    
## Region桃竹苗            0.161382   0.054782    2.95   0.0033 ** 
## Region中彰投           -0.053385   0.054275   -0.98   0.3255    
## Region雲嘉南           -0.066893   0.053672   -1.25   0.2128    
## Region高屏澎           -0.072617   0.053477   -1.36   0.1747    
## Hours                   0.004038   0.000958    4.22  2.6e-05 ***
## SubEduOver高於工作要求 -0.116271   0.024228   -4.80  1.8e-06 ***
## SubEduOver低於工作要求 -0.151612   0.031061   -4.88  1.2e-06 ***
## SubMismatch             0.053431   0.007851    6.81  1.4e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.358 on 1514 degrees of freedom
## Multiple R-squared:  0.31,   Adjusted R-squared:  0.305 
## F-statistic: 52.4 on 13 and 1514 DF,  p-value: <2e-16

Try - 資料留大院校

客觀過量教育一樣只有限縮的部份有顯著,客觀關聯效果蠻好的
自評過量、關聯效果一樣不錯……

# 
p2 <- p[ p$EduLv %in% c("博士","碩士", "普通大學", "科技大學", "技術學院"), ]
#
lmn1 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + ObjOver , data = p2)
summary(lmn1)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + ObjOver, data = p2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9279 -0.2454 -0.0158  0.2084  1.8240 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.13273    0.07198  140.77  < 2e-16 ***
## Gender男            0.19143    0.01972    9.71  < 2e-16 ***
## J_year              0.02595    0.00192   13.49  < 2e-16 ***
## Sector國外學校      0.36819    0.07367    5.00  6.5e-07 ***
## Sector國立(公立)  0.18930    0.01998    9.48  < 2e-16 ***
## Region北北基        0.05165    0.05349    0.97  0.33442    
## Region桃竹苗        0.16567    0.05690    2.91  0.00365 ** 
## Region中彰投       -0.03127    0.05652   -0.55  0.58011    
## Region雲嘉南       -0.05496    0.05582   -0.98  0.32494    
## Region高屏澎       -0.07999    0.05574   -1.44  0.15150    
## Hours               0.00371    0.00100    3.70  0.00022 ***
## ObjOverover         0.01956    0.02103    0.93  0.35242    
## ObjOverunder        0.14981    0.04963    3.02  0.00258 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.365 on 1448 degrees of freedom
## Multiple R-squared:  0.256,  Adjusted R-squared:  0.25 
## F-statistic: 41.5 on 12 and 1448 DF,  p-value: <2e-16
#
lmn2 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + Core , data = p2)
summary(lmn2)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + Core, data = p2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9554 -0.2388 -0.0167  0.2015  1.8713 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.097824   0.070721  142.78  < 2e-16 ***
## Gender男            0.196441   0.019510   10.07  < 2e-16 ***
## J_year              0.026391   0.001905   13.85  < 2e-16 ***
## Sector國外學校      0.377379   0.072916    5.18  2.6e-07 ***
## Sector國立(公立)  0.180053   0.019632    9.17  < 2e-16 ***
## Region北北基        0.059829   0.052899    1.13  0.25824    
## Region桃竹苗        0.175846   0.056170    3.13  0.00178 ** 
## Region中彰投       -0.013663   0.055843   -0.24  0.80675    
## Region雲嘉南       -0.042197   0.055116   -0.77  0.44403    
## Region高屏澎       -0.065873   0.055072   -1.20  0.23184    
## Hours               0.003617   0.000992    3.65  0.00027 ***
## Core部分關聯        0.124463   0.028762    4.33  1.6e-05 ***
## Core核心關聯        0.125587   0.022369    5.61  2.4e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.361 on 1448 degrees of freedom
## Multiple R-squared:  0.272,  Adjusted R-squared:  0.266 
## F-statistic: 45.1 on 12 and 1448 DF,  p-value: <2e-16
#
lmn3 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours +SubEduOver +SubMismatch , data = p2)
summary(lmn3)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + SubEduOver + SubMismatch, data = p2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.061 -0.235 -0.009  0.193  1.674 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            10.03743    0.07398  135.68  < 2e-16 ***
## Gender男                0.20989    0.01920   10.93  < 2e-16 ***
## J_year                  0.02445    0.00187   13.09  < 2e-16 ***
## Sector國外學校          0.38898    0.07153    5.44  6.3e-08 ***
## Sector國立(公立)      0.16748    0.01939    8.64  < 2e-16 ***
## Region北北基            0.03982    0.05181    0.77   0.4423    
## Region桃竹苗            0.15738    0.05498    2.86   0.0043 ** 
## Region中彰投           -0.03097    0.05464   -0.57   0.5710    
## Region雲嘉南           -0.05963    0.05393   -1.11   0.2691    
## Region高屏澎           -0.07396    0.05390   -1.37   0.1702    
## Hours                   0.00399    0.00097    4.11  4.1e-05 ***
## SubEduOver高於工作要求 -0.11512    0.02435   -4.73  2.5e-06 ***
## SubEduOver低於工作要求 -0.14991    0.03188   -4.70  2.8e-06 ***
## SubMismatch             0.04998    0.00799    6.26  5.2e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.353 on 1447 degrees of freedom
## Multiple R-squared:  0.304,  Adjusted R-squared:  0.298 
## F-statistic: 48.6 on 13 and 1447 DF,  p-value: <2e-16

Try - 資料留大學

大學生們在客評過量、關聯的效果非常符合研究期待
在自評部份也表現得不錯~~

p3 <- p[ p$EduLv =="普通大學", ]
#
lmn1 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + ObjOver , data = p3)
summary(lmn1)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + ObjOver, data = p3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8606 -0.2297  0.0024  0.1746  1.4490 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.39298    0.08886  116.96  < 2e-16 ***
## Gender男            0.09634    0.02620    3.68  0.00026 ***
## J_year              0.02696    0.00288    9.38  < 2e-16 ***
## Sector國外學校      0.29113    0.14654    1.99  0.04736 *  
## Sector國立(公立)  0.05768    0.02500    2.31  0.02134 *  
## Region北北基       -0.06319    0.06427   -0.98  0.32586    
## Region桃竹苗       -0.03988    0.07091   -0.56  0.57404    
## Region中彰投       -0.12833    0.06815   -1.88  0.06012 .  
## Region雲嘉南       -0.20235    0.06901   -2.93  0.00348 ** 
## Region高屏澎       -0.14350    0.06785   -2.11  0.03481 *  
## Hours               0.00267    0.00130    2.05  0.04065 *  
## ObjOverover        -0.07707    0.02554   -3.02  0.00265 ** 
## ObjOverunder        0.19290    0.05442    3.54  0.00042 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.321 on 676 degrees of freedom
## Multiple R-squared:  0.205,  Adjusted R-squared:  0.191 
## F-statistic: 14.5 on 12 and 676 DF,  p-value: <2e-16
#
lmn2 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours + Core , data = p3)
summary(lmn2)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + Core, data = p3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8663 -0.2332 -0.0088  0.1737  1.5754 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        10.30022    0.08851  116.37  < 2e-16 ***
## Gender男            0.10452    0.02639    3.96  8.2e-05 ***
## J_year              0.02687    0.00290    9.25  < 2e-16 ***
## Sector國外學校      0.34506    0.14676    2.35  0.01900 *  
## Sector國立(公立)  0.05909    0.02496    2.37  0.01818 *  
## Region北北基       -0.04357    0.06471   -0.67  0.50099    
## Region桃竹苗       -0.03278    0.07128   -0.46  0.64571    
## Region中彰投       -0.10804    0.06852   -1.58  0.11532    
## Region雲嘉南       -0.19559    0.06932   -2.82  0.00492 ** 
## Region高屏澎       -0.12056    0.06815   -1.77  0.07733 .  
## Hours               0.00275    0.00131    2.10  0.03593 *  
## Core部分關聯        0.13188    0.03908    3.37  0.00078 ***
## Core核心關聯        0.11616    0.02988    3.89  0.00011 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.322 on 676 degrees of freedom
## Multiple R-squared:  0.198,  Adjusted R-squared:  0.183 
## F-statistic: 13.9 on 12 and 676 DF,  p-value: <2e-16
#
lmn3 <- lm(log(Salary) ~ Gender + J_year + Sector +Region + Hours +SubEduOver +SubMismatch , data = p3)
summary(lmn3)
## 
## Call:
## lm(formula = log(Salary) ~ Gender + J_year + Sector + Region + 
##     Hours + SubEduOver + SubMismatch, data = p3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7786 -0.2052 -0.0147  0.1705  1.5162 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            10.21838    0.09405  108.65  < 2e-16 ***
## Gender男                0.11493    0.02604    4.41  1.2e-05 ***
## J_year                  0.02660    0.00285    9.34  < 2e-16 ***
## Sector國外學校          0.38019    0.14415    2.64  0.00855 ** 
## Sector國立(公立)      0.04566    0.02449    1.86  0.06269 .  
## Region北北基           -0.05610    0.06351   -0.88  0.37739    
## Region桃竹苗           -0.02489    0.07013   -0.35  0.72278    
## Region中彰投           -0.11381    0.06732   -1.69  0.09135 .  
## Region雲嘉南           -0.18531    0.06805   -2.72  0.00663 ** 
## Region高屏澎           -0.11480    0.06695   -1.71  0.08687 .  
## Hours                   0.00336    0.00128    2.62  0.00893 ** 
## SubEduOver高於工作要求 -0.11033    0.03337   -3.31  0.00099 ***
## SubEduOver低於工作要求 -0.11087    0.04288   -2.59  0.00993 ** 
## SubMismatch             0.04522    0.01034    4.37  1.4e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.316 on 675 degrees of freedom
## Multiple R-squared:  0.228,  Adjusted R-squared:  0.214 
## F-statistic: 15.4 on 13 and 675 DF,  p-value: <2e-16