議題:

  1. UNESCO中,選定國家之學士、碩士、博士總數;
  2. 其學士、碩士、博士性別比(女/男);
  3. 其學士與碩士博士比率;
  4. 其STEM就讀比率。

Questions:

  1. Enrolment by level in each reference countries (ISCED 6, 7, 8);
  2. Gender Parity Index (GPI, (Female/Male) );
  3. Bachelor ratio in ISCED 6, 7, 8 (Bachelor/(Master+Doctoral) );
  4. STEM ratio in Tertiary Education.

Glossary:

Reference Countries:


Demo file & UNESCO:


R script, comments and plots:

library("stringr")
library("purrr")
library("ggplot2")
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Transform UNESCO raw CSV file to dataframe then answer Q1, Q2, Q3, Q4

GPI <- read.csv("/Users/wh/Desktop/GPI_STEM.csv")
  GPI$Degree <- str_split(GPI$Indicator,"_",simplify = T)[,1]
  GPI$Gender <- str_split(GPI$Indicator,"_",simplify = T)[,2]
  table(GPI$Degree)
  table(GPI$Gender)

Q1. Enrolment by level in each reference countries (ISCED 6, 7, 8):

GPI2 <- GPI %>% filter(Degree %in% c("Bachelor","Doctoral","Master"))
tbl1 <- GPI2 %>% group_by(LOCATION, Degree, Year) %>% summarise(HeadCount = sum(Value))
## `summarise()` regrouping output by 'LOCATION', 'Degree' (override with `.groups` argument)
head(tbl1)
View(tbl1)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 執行
## 中命令 ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/modules/
## R_de.so'' 已有狀態 1
Plot: Bachelor by countries
## Plot: Bachelor by countries
ISCED6 <- tbl1 %>%
  filter(Degree %in% c("Bachelor")) 
  ggplot(data = ISCED6, aes(x=Year, y=HeadCount, group=LOCATION, color=LOCATION)) +
    geom_label(aes(label=LOCATION))+
    ylab("Numbers of Bachelor")

Plot: Master by countries
## Plot: Master by countries
ISCED7 <- tbl1 %>%
  filter(Degree %in% c("Master")) 
  ggplot(data = ISCED7, aes(x=Year, y=HeadCount, group=LOCATION, color=LOCATION)) +
    geom_label(aes(label=LOCATION))+
    ylab("Numbers of Master")

Plot: Doctoral by countries
## Plot: Doctoral by countries
ISCED8 <- tbl1 %>%
  filter(Degree %in% c("Doctoral")) 
  ggplot(data = ISCED8, aes(x=Year, y=HeadCount, group=LOCATION, color=LOCATION)) +
    geom_label(aes(label=LOCATION))+
    ylab("Numbers of Doctoral") 

####

## `summarise()` regrouping output by 'LOCATION', 'Year', 'Degree' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups:   LOCATION, Year, Degree [3]
##   LOCATION  Year Degree   Gender HeadCount
##   <chr>    <int> <chr>    <chr>      <dbl>
## 1 AUS       1998 Doctoral Female     11195
## 2 AUS       1998 Doctoral Male       13983
## 3 AUS       1999 Doctoral Female     12115
## 4 AUS       1999 Doctoral Male       14270
## 5 AUS       2000 Doctoral Female     13016
## 6 AUS       2000 Doctoral Male       14599

Q2. Gender Parity Index (GPI, gender ratio):

## Level of each degree headcount by countries, year & gender
temp_df <- GPI2 %>% group_by(LOCATION, Year, Degree, Gender) %>% 
  summarise(HeadCount = sum(Value))
## `summarise()` regrouping output by 'LOCATION', 'Year', 'Degree' (override with `.groups` argument)
head(temp_df)
## # A tibble: 6 x 5
## # Groups:   LOCATION, Year, Degree [3]
##   LOCATION  Year Degree   Gender HeadCount
##   <chr>    <int> <chr>    <chr>      <dbl>
## 1 AUS       1998 Doctoral Female     11195
## 2 AUS       1998 Doctoral Male       13983
## 3 AUS       1999 Doctoral Female     12115
## 4 AUS       1999 Doctoral Male       14270
## 5 AUS       2000 Doctoral Female     13016
## 6 AUS       2000 Doctoral Male       14599
## Check if every country every degree have 2 genders
tbl2 <- temp_df %>%
  group_by(LOCATION, Year, Degree) %>%
  summarise(GenderRatio = first(HeadCount)/last(HeadCount))
## `summarise()` regrouping output by 'LOCATION', 'Year' (override with `.groups` argument)
head(tbl2)
## # A tibble: 6 x 4
## # Groups:   LOCATION, Year [6]
##   LOCATION  Year Degree   GenderRatio
##   <chr>    <int> <chr>          <dbl>
## 1 AUS       1998 Doctoral       0.801
## 2 AUS       1999 Doctoral       0.849
## 3 AUS       2000 Doctoral       0.892
## 4 AUS       2001 Doctoral       0.918
## 5 AUS       2002 Doctoral       0.940
## 6 AUS       2003 Doctoral       0.961
temp_df <- GPI2 %>% group_by(LOCATION, Degree, Year) %>% 
  summarise(HeadCount = sum(Value)) 
## `summarise()` regrouping output by 'LOCATION', 'Degree' (override with `.groups` argument)
head(temp_df)
## # A tibble: 6 x 4
## # Groups:   LOCATION, Degree [1]
##   LOCATION Degree    Year HeadCount
##   <chr>    <chr>    <int>     <dbl>
## 1 AUS      Bachelor  2013    905913
## 2 AUS      Bachelor  2014    930675
## 3 AUS      Bachelor  2015    952578
## 4 AUS      Bachelor  2016    977963
## 5 AUS      Bachelor  2017    981844
## 6 AUS      Bachelor  2018    999034
Plot: GPI in Bachelor
Bachelor_GPI <- tbl2 %>%
  filter(Degree %in% c("Bachelor")) 
ggplot(data = Bachelor_GPI, aes(x=Year, y=GenderRatio, group=LOCATION, color=LOCATION)) +
  geom_label(aes(label=LOCATION))+
  ylab("GPI in Bachelor") 

Plot: GPI in Master
Master_GPI <- tbl2 %>%
  filter(Degree %in% c("Master")) 
ggplot(data = Master_GPI, aes(x=Year, y=GenderRatio, group=LOCATION, color=LOCATION)) +
  geom_label(aes(label=LOCATION))+
  ylab("GPI in Master") 

Plot: GPI in Doctoral
Doctoral_GPI <- tbl2 %>%
  filter(Degree %in% c("Doctoral")) 
ggplot(data = Doctoral_GPI, aes(x=Year, y=GenderRatio, group=LOCATION, color=LOCATION)) +
  geom_label(aes(label=LOCATION))+
  ylab("GPI in Doctoral") 


Q.3 Bachelor ratio in ISCED6, 7, 8 (Bachelor/(Master+Doctoral)):

tbl3 <- temp_df %>% group_by(LOCATION, Year) %>%
  summarise(StudentRatio = first(HeadCount) / (sum(HeadCount) - first(HeadCount)))
## `summarise()` regrouping output by 'LOCATION' (override with `.groups` argument)
head(tbl3)
## # A tibble: 6 x 3
## # Groups:   LOCATION [1]
##   LOCATION  Year StudentRatio
##   <chr>    <int>        <dbl>
## 1 AUS       1998          Inf
## 2 AUS       1999          Inf
## 3 AUS       2000          Inf
## 4 AUS       2001          Inf
## 5 AUS       2002          Inf
## 6 AUS       2003          Inf
tbl3 <- tbl3 %>% filter(! is.infinite(StudentRatio))
Plot: Bachelor ratio in ISCED 6, 7, 8 (Bachelor/(Master+Doctoral))
ISCED6ratio <- tbl3
ggplot(data = ISCED6ratio, aes(x=Year, y=StudentRatio, group=LOCATION, color=LOCATION)) +
  geom_label(aes(label=LOCATION))+
  ylab("ISCED-6 Ratio") 


Q.4 Field S.T.E.M. ratio in Tertiary Education:

tbl4 <- GPI %>% 
  filter(Degree %in% c("STEM","nonSTEM")) 
  View(tbl4)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 執行
## 中命令 ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/modules/
## R_de.so'' 已有狀態 1
STEM_ratio <- tbl4 %>% 
  filter(Degree %in% c("STEM"))
ggplot(data = STEM_ratio, aes(x=Year, y=Value, group=LOCATION, color=LOCATION)) +
  geom_label(aes(label=LOCATION))+
  ylab("S.T.E.M. Ratio (%)")