library("stringr")
library("purrr")
library("ggplot2")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
GPI <- read.csv("/Users/wh/Desktop/GPI_STEM.csv")
GPI$Degree <- str_split(GPI$Indicator,"_",simplify = T)[,1]
GPI$Gender <- str_split(GPI$Indicator,"_",simplify = T)[,2]
table(GPI$Degree)
table(GPI$Gender)
GPI2 <- GPI %>% filter(Degree %in% c("Bachelor","Doctoral","Master"))
tbl1 <- GPI2 %>% group_by(LOCATION, Degree, Year) %>% summarise(HeadCount = sum(Value))
## `summarise()` regrouping output by 'LOCATION', 'Degree' (override with `.groups` argument)
head(tbl1)
View(tbl1)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 執行
## 中命令 ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/modules/
## R_de.so'' 已有狀態 1
## Plot: Bachelor by countries
ISCED6 <- tbl1 %>%
filter(Degree %in% c("Bachelor"))
ggplot(data = ISCED6, aes(x=Year, y=HeadCount, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("Numbers of Bachelor")
## Plot: Master by countries
ISCED7 <- tbl1 %>%
filter(Degree %in% c("Master"))
ggplot(data = ISCED7, aes(x=Year, y=HeadCount, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("Numbers of Master")
## Plot: Doctoral by countries
ISCED8 <- tbl1 %>%
filter(Degree %in% c("Doctoral"))
ggplot(data = ISCED8, aes(x=Year, y=HeadCount, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("Numbers of Doctoral")
####
## `summarise()` regrouping output by 'LOCATION', 'Year', 'Degree' (override with `.groups` argument)
## # A tibble: 6 x 5
## # Groups: LOCATION, Year, Degree [3]
## LOCATION Year Degree Gender HeadCount
## <chr> <int> <chr> <chr> <dbl>
## 1 AUS 1998 Doctoral Female 11195
## 2 AUS 1998 Doctoral Male 13983
## 3 AUS 1999 Doctoral Female 12115
## 4 AUS 1999 Doctoral Male 14270
## 5 AUS 2000 Doctoral Female 13016
## 6 AUS 2000 Doctoral Male 14599
## Level of each degree headcount by countries, year & gender
temp_df <- GPI2 %>% group_by(LOCATION, Year, Degree, Gender) %>%
summarise(HeadCount = sum(Value))
## `summarise()` regrouping output by 'LOCATION', 'Year', 'Degree' (override with `.groups` argument)
head(temp_df)
## # A tibble: 6 x 5
## # Groups: LOCATION, Year, Degree [3]
## LOCATION Year Degree Gender HeadCount
## <chr> <int> <chr> <chr> <dbl>
## 1 AUS 1998 Doctoral Female 11195
## 2 AUS 1998 Doctoral Male 13983
## 3 AUS 1999 Doctoral Female 12115
## 4 AUS 1999 Doctoral Male 14270
## 5 AUS 2000 Doctoral Female 13016
## 6 AUS 2000 Doctoral Male 14599
## Check if every country every degree have 2 genders
tbl2 <- temp_df %>%
group_by(LOCATION, Year, Degree) %>%
summarise(GenderRatio = first(HeadCount)/last(HeadCount))
## `summarise()` regrouping output by 'LOCATION', 'Year' (override with `.groups` argument)
head(tbl2)
## # A tibble: 6 x 4
## # Groups: LOCATION, Year [6]
## LOCATION Year Degree GenderRatio
## <chr> <int> <chr> <dbl>
## 1 AUS 1998 Doctoral 0.801
## 2 AUS 1999 Doctoral 0.849
## 3 AUS 2000 Doctoral 0.892
## 4 AUS 2001 Doctoral 0.918
## 5 AUS 2002 Doctoral 0.940
## 6 AUS 2003 Doctoral 0.961
temp_df <- GPI2 %>% group_by(LOCATION, Degree, Year) %>%
summarise(HeadCount = sum(Value))
## `summarise()` regrouping output by 'LOCATION', 'Degree' (override with `.groups` argument)
head(temp_df)
## # A tibble: 6 x 4
## # Groups: LOCATION, Degree [1]
## LOCATION Degree Year HeadCount
## <chr> <chr> <int> <dbl>
## 1 AUS Bachelor 2013 905913
## 2 AUS Bachelor 2014 930675
## 3 AUS Bachelor 2015 952578
## 4 AUS Bachelor 2016 977963
## 5 AUS Bachelor 2017 981844
## 6 AUS Bachelor 2018 999034
Bachelor_GPI <- tbl2 %>%
filter(Degree %in% c("Bachelor"))
ggplot(data = Bachelor_GPI, aes(x=Year, y=GenderRatio, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("GPI in Bachelor")
Master_GPI <- tbl2 %>%
filter(Degree %in% c("Master"))
ggplot(data = Master_GPI, aes(x=Year, y=GenderRatio, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("GPI in Master")
Doctoral_GPI <- tbl2 %>%
filter(Degree %in% c("Doctoral"))
ggplot(data = Doctoral_GPI, aes(x=Year, y=GenderRatio, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("GPI in Doctoral")
tbl3 <- temp_df %>% group_by(LOCATION, Year) %>%
summarise(StudentRatio = first(HeadCount) / (sum(HeadCount) - first(HeadCount)))
## `summarise()` regrouping output by 'LOCATION' (override with `.groups` argument)
head(tbl3)
## # A tibble: 6 x 3
## # Groups: LOCATION [1]
## LOCATION Year StudentRatio
## <chr> <int> <dbl>
## 1 AUS 1998 Inf
## 2 AUS 1999 Inf
## 3 AUS 2000 Inf
## 4 AUS 2001 Inf
## 5 AUS 2002 Inf
## 6 AUS 2003 Inf
tbl3 <- tbl3 %>% filter(! is.infinite(StudentRatio))
ISCED6ratio <- tbl3
ggplot(data = ISCED6ratio, aes(x=Year, y=StudentRatio, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("ISCED-6 Ratio")
tbl4 <- GPI %>%
filter(Degree %in% c("STEM","nonSTEM"))
View(tbl4)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 執行
## 中命令 ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/modules/
## R_de.so'' 已有狀態 1
STEM_ratio <- tbl4 %>%
filter(Degree %in% c("STEM"))
ggplot(data = STEM_ratio, aes(x=Year, y=Value, group=LOCATION, color=LOCATION)) +
geom_label(aes(label=LOCATION))+
ylab("S.T.E.M. Ratio (%)")