library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.9
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(gtsummary)
dta <- read.csv("ncku_prof_V6.csv", h=T, stringsAsFactors = TRUE)
head(dta)
##      ID Initial Citation H.id Gender Degree Rank College Dept Grads  FPY
## 1 10001     YCC      305    9      M      D    3     ENG  ESC     3 2013
## 2 10002     CYC      355   11      M      D    2     ENG  ESC    10 2008
## 3 10003     HBC     3452   10      M      D    1     ENG  ESC     0 2011
## 4 10004     HHC    15808   65      M      O    1     ENG  ESC    92 1997
## 5 10005     JSC      280   10      F      O    2     ENG  ESC    25 2011
## 6 10006     MYC     2506   22      M      D    2     ENG  ESC    41 2002
##   Articles StuApp Colprof
## 1       30    169     309
## 2       22    169     309
## 3       14    169     309
## 4      349    169     309
## 5       23    169     309
## 6       90    169     309
str(dta)
## 'data.frame':    460 obs. of  14 variables:
##  $ ID      : int  10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 ...
##  $ Initial : Factor w/ 347 levels "BCT","BHC","BLC",..: 308 60 81 90 145 201 293 176 198 276 ...
##  $ Citation: int  305 355 3452 15808 280 2506 672 5735 1118 685 ...
##  $ H.id    : int  9 11 10 65 10 22 14 40 19 14 ...
##  $ Gender  : Factor w/ 2 levels "F","M": 2 2 2 2 1 2 2 2 2 2 ...
##  $ Degree  : Factor w/ 2 levels "D","O": 1 1 1 2 2 1 1 1 2 1 ...
##  $ Rank    : int  3 2 1 1 2 2 1 1 2 1 ...
##  $ College : Factor w/ 5 levels "ENG","LIB","MNG",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Dept    : Factor w/ 25 levels "ACC","BAD","CEN",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ Grads   : int  3 10 0 92 25 41 36 54 74 195 ...
##  $ FPY     : int  2013 2008 2011 1997 2011 2002 2008 2001 1994 1991 ...
##  $ Articles: int  30 22 14 349 23 90 36 123 26 70 ...
##  $ StuApp  : int  169 169 169 169 169 169 169 169 169 169 ...
##  $ Colprof : int  309 309 309 309 309 309 309 309 309 309 ...

Assessment 1

adta <- dta %>%
filter(H.id > 12) %>%
select(H.id, Gender, College, Rank, Degree, Grads)
tail(adta)
##     H.id Gender College Rank Degree Grads
## 187   14      F     MNG    2      D    17
## 188   13      M     MNG    1      O    89
## 189   15      M     MNG    2      D    27
## 190   17      M     MNG    1      D    26
## 191   27      M     MNG    1      D    23
## 192   14      M     MNG    2      D     9

Assessment 2

dta %>%
  mutate(academicy = 2022 - FPY)%>%
  select( H.id, Gender, Degree, Rank, Grads, academicy )%>%
  head()
##   H.id Gender Degree Rank Grads academicy
## 1    9      M      D    3     3         9
## 2   11      M      D    2    10        14
## 3   10      M      D    1     0        11
## 4   65      M      O    1    92        25
## 5   10      F      O    2    25        11
## 6   22      M      D    2    41        20
dta %>%
  mutate(academicy = 2022 - FPY,
         Grads_m = Grads / academicy) %>%
  select( H.id, Gender, Degree, Rank, Grads, academicy , Grads_m )%>%
  head()
##   H.id Gender Degree Rank Grads academicy   Grads_m
## 1    9      M      D    3     3         9 0.3333333
## 2   11      M      D    2    10        14 0.7142857
## 3   10      M      D    1     0        11 0.0000000
## 4   65      M      O    1    92        25 3.6800000
## 5   10      F      O    2    25        11 2.2727273
## 6   22      M      D    2    41        20 2.0500000

Assessment3

dta %>%
  group_by(College, Gender, Rank, Degree) %>%
  summarize(mean_H.id = mean(H.id, na.rm = TRUE),
            sd_H.id = sd(H.id),
            v_H.id = var(H.id),
            max_H.id = max(H.id),
            min_H.id = min(H.id), 
            count = n()) %>%
  arrange(desc(mean_H.id))
## `summarise()` has grouped output by 'College', 'Gender', 'Rank'. You can
## override using the `.groups` argument.
## # A tibble: 53 x 10
## # Groups:   College, Gender, Rank [30]
##    College Gender  Rank Degree mean_H.id sd_H.id v_H.id max_H.id min_H.id count
##    <fct>   <fct>  <int> <fct>      <dbl>   <dbl>  <dbl>    <int>    <int> <int>
##  1 ENG     F          1 D           34     10.4   108         46       28     3
##  2 ENG     M          1 D           24.4   11.0   121.        54        6    28
##  3 ENG     M          1 O           24.2   13.9   192.        92        3    76
##  4 SCI     M          1 D           21     16.1   258         39        6     4
##  5 ENG     F          1 O           19.5    9.71   94.3       32       10     4
##  6 SCI     M          1 O           18.8   15.2   231.        58        3    24
##  7 SCI     F          1 O           18.2   14.4   206.        34        3     5
##  8 ENG     M          2 D           17.3    6.81   46.4       40       10    20
##  9 MNG     M          1 D           16      6.48   42         27        8     6
## 10 MNG     F          1 O           15.2    8.54   72.9       27        8     4
## # ... with 43 more rows

#H.id平均數最高的群組特質為ENG的College,gender為女性,degree為國內,Rank顯示為教授

#H.id平均數最低為零,皆為LIB的College且為國內degree,gender有男有女,多為副教授或助理教授

#「工學院男教授的平均學術產能不及工學院女教授」此論述不恰當,因若就教授人數來看,H.id平均數排名第一雖是女性,但人數為3人;而H.id平均數排名第二的男性群組人數為28人,因此該群組的H.id平均數也是相對較穩定的。

#在此表中顯示文學院教授的學術產能在五個學院中皆排在後半端,可能是由於不同領域間呈現學術成果的方式不同,因此在表中,主要非以學術刊物發表的文學院,會被排序到後端。

dta %>%
  select(College, Gender, Degree, Rank) %>%
  tbl_summary(by=College)
## Warning: The `fmt_missing()` function is deprecated and will soon be removed
## * Use the `sub_missing()` function instead
Characteristic ENG, N = 1841 LIB, N = 631 MNG, N = 841 SCI, N = 721 SSC, N = 571
Gender
F 17 (9.2%) 34 (54%) 24 (29%) 14 (19%) 22 (39%)
M 167 (91%) 29 (46%) 60 (71%) 58 (81%) 35 (61%)
Degree
D 63 (34%) 21 (33%) 25 (30%) 16 (22%) 13 (23%)
O 121 (66%) 42 (67%) 59 (70%) 56 (78%) 44 (77%)
Rank
1 111 (60%) 29 (46%) 36 (43%) 36 (50%) 28 (49%)
2 44 (24%) 29 (46%) 27 (32%) 27 (38%) 22 (39%)
3 29 (16%) 5 (7.9%) 21 (25%) 9 (12%) 7 (12%)
1 n (%)

#Gender部份看到,除了文學院的男女性別占比較平均外,在其他學院中都以男性為多數

#Degree部份看到,所有學院都是以海外的占比高

#Rank部份看到,所有學院都是以教授居多,工學院的教授占比最高