library(tidyverse)
library(gtsummary)
匯入資料並命名
dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
查看前六筆資料
head(dta)
## ID Initial Citation H.id Gender Degree Rank College Dept Grads FPY
## 1 10001 YCC 305 9 M D 3 ENG ESC 3 2013
## 2 10002 CYC 355 11 M D 2 ENG ESC 10 2008
## 3 10003 HBC 3452 10 M D 1 ENG ESC 0 2011
## 4 10004 HHC 15808 65 M O 1 ENG ESC 92 1997
## 5 10005 JSC 280 10 F O 2 ENG ESC 25 2011
## 6 10006 MYC 2506 22 M D 2 ENG ESC 41 2002
## Articles StuApp Colprof
## 1 30 169 309
## 2 22 169 309
## 3 14 169 309
## 4 349 169 309
## 5 23 169 309
## 6 90 169 309
檢視資料結構
str(dta)
## 'data.frame': 460 obs. of 14 variables:
## $ ID : int 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 ...
## $ Initial : Factor w/ 347 levels "BCT","BHC","BLC",..: 308 60 81 90 145 201 293 176 198 276 ...
## $ Citation: int 305 355 3452 15808 280 2506 672 5735 1118 685 ...
## $ H.id : int 9 11 10 65 10 22 14 40 19 14 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 2 2 2 1 2 2 2 2 2 ...
## $ Degree : Factor w/ 2 levels "D","O": 1 1 1 2 2 1 1 1 2 1 ...
## $ Rank : int 3 2 1 1 2 2 1 1 2 1 ...
## $ College : Factor w/ 5 levels "ENG","LIB","MNG",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Dept : Factor w/ 25 levels "ACC","BAD","CEN",..: 10 10 10 10 10 10 10 10 10 10 ...
## $ Grads : int 3 10 0 92 25 41 36 54 74 195 ...
## $ FPY : int 2013 2008 2011 1997 2011 2002 2008 2001 1994 1991 ...
## $ Articles: int 30 22 14 349 23 90 36 123 26 70 ...
## $ StuApp : int 169 169 169 169 169 169 169 169 169 169 ...
## $ Colprof : int 309 309 309 309 309 309 309 309 309 309 ...
Assessment 1
bdta <- dta %>%
filter(H.id > 12) %>%
select(H.id, Gender, College, Rank, Degree, Grads)
查看最後六筆資料
tail(bdta)
## H.id Gender College Rank Degree Grads
## 187 14 F MNG 2 D 17
## 188 13 M MNG 1 O 89
## 189 15 M MNG 2 D 27
## 190 17 M MNG 1 D 26
## 191 27 M MNG 1 D 23
## 192 14 M MNG 2 D 9
Assessment 2
newdta <-dta %>%
filter(!is.na(FPY)) %>%
mutate(academicy = 2022 - FPY, Grads_m = Grads / academicy) %>%
select(H.id, Gender, Degree, Rank, College, Grads, academicy, Grads_m)
檢視前六筆資料
head(newdta)
## H.id Gender Degree Rank College Grads academicy Grads_m
## 1 9 M D 3 ENG 3 9 0.3333333
## 2 11 M D 2 ENG 10 14 0.7142857
## 3 10 M D 1 ENG 0 11 0.0000000
## 4 65 M O 1 ENG 92 25 3.6800000
## 5 10 F O 2 ENG 25 11 2.2727273
## 6 22 M D 2 ENG 41 20 2.0500000
Assessment 3
遞減排序
dta %>%
group_by(College, Gender, Rank, Degree) %>%
summarize(mean_H.id = mean(H.id, na.rm = TRUE),
sd_H.id = sd(H.id),
v_H.id = var(H.id),
max_H.id = max(H.id),
min_H.id = min(H.id),
count = n())%>%
arrange(desc(mean_H.id))
## # A tibble: 53 × 10
## # Groups: College, Gender, Rank [30]
## College Gender Rank Degree mean_H.id sd_H.id v_H.id max_H.id min_H.id count
## <fct> <fct> <int> <fct> <dbl> <dbl> <dbl> <int> <int> <int>
## 1 ENG F 1 D 34 10.4 108 46 28 3
## 2 ENG M 1 D 24.4 11.0 121. 54 6 28
## 3 ENG M 1 O 24.2 13.9 192. 92 3 76
## 4 SCI M 1 D 21 16.1 258 39 6 4
## 5 ENG F 1 O 19.5 9.71 94.3 32 10 4
## 6 SCI M 1 O 18.8 15.2 231. 58 3 24
## 7 SCI F 1 O 18.2 14.4 206. 34 3 5
## 8 ENG M 2 D 17.3 6.81 46.4 40 10 20
## 9 MNG M 1 D 16 6.48 42 27 8 6
## 10 MNG F 1 O 15.2 8.54 72.9 27 8 4
## # … with 43 more rows
遞增排序
dta %>%
group_by(College, Gender, Rank, Degree) %>%
summarize(mean_H.id = mean(H.id, na.rm = TRUE),
sd_H.id = sd(H.id),
v_H.id = var(H.id),
max_H.id = max(H.id),
min_H.id = min(H.id),
count = n())%>%
arrange((mean_H.id))
## # A tibble: 53 × 10
## # Groups: College, Gender, Rank [30]
## College Gender Rank Degree mean_H.id sd_H.id v_H.id max_H.id min_H.id count
## <fct> <fct> <int> <fct> <dbl> <dbl> <dbl> <int> <int> <int>
## 1 LIB F 2 D 0 0 0 0 0 6
## 2 LIB F 3 D 0 0 0 0 0 2
## 3 LIB M 1 D 0 0 0 0 0 4
## 4 LIB M 3 D 0 NA NA 0 0 1
## 5 LIB M 2 D 0.167 0.408 0.167 1 0 6
## 6 LIB F 3 O 0.5 0.707 0.5 1 0 2
## 7 LIB M 2 O 0.727 1.19 1.42 3 0 11
## 8 LIB F 2 O 0.833 0.983 0.967 2 0 6
## 9 SSC F 3 D 1 NA NA 1 1 1
## 10 LIB F 1 D 1.5 0.707 0.5 2 1 2
## # … with 43 more rows
3-2
1. H.id最高: 工學院/女性/教授/本土博士; 最低:
文學院/女性/副教授/本土。
2.
此論述不恰當,因為雖然工學院/女生/教授/本土博士群組的分數最高,但他的人數只有三人,因此不能過度推論。
3.
5個學院中,文學院教授的學術產能最低,且排名最低的三筆完全沒有學術產能。
Assessment 4
dta|>
select(College, Gender, Degree, Rank) |>
tbl_summary(by = College)
| Characteristic |
ENG, N = 184 |
LIB, N = 63 |
MNG, N = 84 |
SCI, N = 72 |
SSC, N = 57 |
| Gender |
|
|
|
|
|
| F |
17 (9.2%) |
34 (54%) |
24 (29%) |
14 (19%) |
22 (39%) |
| M |
167 (91%) |
29 (46%) |
60 (71%) |
58 (81%) |
35 (61%) |
| Degree |
|
|
|
|
|
| D |
63 (34%) |
21 (33%) |
25 (30%) |
16 (22%) |
13 (23%) |
| O |
121 (66%) |
42 (67%) |
59 (70%) |
56 (78%) |
44 (77%) |
| Rank |
|
|
|
|
|
| 1 |
111 (60%) |
29 (46%) |
36 (43%) |
36 (50%) |
28 (49%) |
| 2 |
44 (24%) |
29 (46%) |
27 (32%) |
27 (38%) |
22 (39%) |
| 3 |
29 (16%) |
5 (7.9%) |
21 (25%) |
9 (12%) |
7 (12%) |
結論
5個學院中,男教授的比例皆大於女性,尤其工學院的比例差距最大;
文學院最小。
5個學院教授博士學位的比例留學大於本國,其中理學院的比例差距最大;
工學院最小。
5個學院的教授職等人數/比例依序為: 教授> 副教授>
助理教授。