Setup and data loading

source(here::here("src/common_basis.R"), local = knitr::knit_global())
here() starts at /Users/jiemakel/tyo/teaching-analysis
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.0      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
* Automatic snapshot has updated '~/tyo/teaching-analysis/renv.lock'.

Person work year data

group_costs <- tribble(
  ~subgroup,~cost,
  "4. porras / TR Staff Level 4",6500,
  "3. porras / TR Staff Level 3",5000,
  "2. porras / TR Staff Level 2",3500,
  "1. porras / TR Staff Level 1",2500,
  "Opetuksen ja tutkimuksen tukihenkilöstö / TR Support Staff",3000
)


pwy_in <- read_csv(here("data/input/LTS_henkilo_tehtavajaottelu_henkilostoryhma-htv.csv"),locale=locale(decimal_mark=','),col_types=cols(...35='c',...36='c',`Henkilöstöryhmä / Staff Group`='c',`Henkilöstöalaryhmä / Staff Subgroup`='c',`Vuosi / Year`='i',.default="d"))
New names:
pwy <- pwy_in %>%
  rename(year=`Vuosi / Year`,group=`Henkilöstöryhmä / Staff Group`,subgroup=`Henkilöstöalaryhmä / Staff Subgroup`,unit=...35,subunit=...36) %>%
  select(year,2:15,group,subgroup,unit,subunit) %>%
  pivot_longer(2:15,values_to="pwy") %>%
  filter(!is.na(pwy)) %>%
  select(-name) %>%
  distinct() %>%
  left_join(group_costs) %>%
#  replace_na(list(cost=2500)) %>%
  mutate(cost=pwy*cost)
Joining, by = "subgroup"
pwy

OKM gains data

gains <- read_csv(here("data/input/Rahoitus vuodelle 2022 suoritteen laskennallinen tuotto.csv"), locale=locale(decimal_mark=',')) %>%
  rename(gain=`Suoritteen laskennallinen tuotto vuoden 2022 rahoituksessa / Value of an output in 2022 funding`,okm_weight=`Kriteerin painoarvo OKM-rahoitusmallissa / Share of the criteria in the MinEdu core funding modell`,type=Suorite,subtype=Nimi) %>%
  distinct(type,subtype,okm_weight,gain) %>%
  mutate(type=case_when(
    type == "2 Tutkintopiste, alempi korkeakoulututkinto / Degree point, Bachelor's degree" ~ "Bachelor's",
    type == "1 Tutkintopiste, ylempi korkeakoulututkinto / Degree point, Master's degree" ~ "Master's",
    T ~ type
  ))
Rows: 163 Columns: 9── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Suorite, Nimi
dbl (7): Kriteerin painoarvo OKM-rahoitusmallissa / Share of the criteria in the MinEdu core funding modell, HY:n rahoitus kriteereillä vuoden 2022 rahoituksesta / UH fun...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gains

Publication data

publ <- read_csv(here("data/input/OKM-seurantakohteet jufo-tasoluokat Osasto ja Alayksikkö.csv")) %>%
  rename(unit=`Tiedekunnan koodi ja nimi`,subunit=`Laitoksen koodi ja nimi`,subsubunit=...1,year=Julkaisuvuosi...4,jufo=`Jufoluokitus (Virta)`,jufo2=`Vahvistettu julkaisukanavaluokitus`,n=`Julkaisujen lukumäärä`) %>%
  select(-c(Julkaisuvuosi...10,`%`,`Viety okm`)) %>%
  group_by(across(-c(`Vahvistettu julkaisufoorumi id`,n))) %>%
  summarize(n=sum(n),.groups="drop")
New names:Rows: 49785 Columns: 11── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ...1, Laitoksen koodi ja nimi, Tiedekunnan koodi ja nimi, Viety okm
dbl (7): Julkaisuvuosi...4, Vahvistettu julkaisukanavaluokitus, Jufoluokitus (Virta), Vahvistettu julkaisufoorumi id, Julkaisujen lukumäärä, %, Julkaisuvuosi...10
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
publ_values <- tribble(
  ~jufo, ~value,
  0, 0.1,
  1, 1,
  2, 3,
  3, 4
)

publ_gains <- publ_values %>%
  mutate(gain=(gains %>% filter(type=="9 Julkaisupiste / Point, scientific publications") %>% pull(gain))*value)

publ

Teaching data

teaching <- read_tsv(here("data/processed/all-courses.tsv.gz")) %>%
  filter(ects!=0) %>%
  mutate(type=case_when(
    unittype=="programme" & str_detect(name, "Bachelor") ~ "Bachelor's",
    unittype=="programme" & str_detect(name, "Master|Magister|^Degree") ~ "Master's"
  )) %>% left_join(
    gains %>% 
      filter(type %in% c("Bachelor's","Master's")) %>%
      select(type,gain)
  ) %>%
  mutate(gain_per_ects=if_else(type=="Bachelor's degree",gain/180,gain/120)) %>%
  select(-gain)
Rows: 35566 Columns: 7── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (4): unitcode, name, unittype, parent_unitcode
dbl (3): year, ects, students
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Joining, by = "type"
teaching

Degree attainment data

degrees <- read_tsv(here("data/processed/degrees-attained.tsv.gz")) %>%
  mutate(type=case_when(
    str_detect(name, "Bachelor") ~ "2 Tutkintopiste, alempi korkeakoulututkinto / Degree point, Bachelor's degree",
    str_detect(name, "Master|Magister|^Degree Programme") ~ "1 Tutkintopiste, ylempi korkeakoulututkinto / Degree point, Master's degree"
  )) %>%
  left_join(gains %>% select(type,gain)) %>%
  mutate(gain=degrees*gain)
Rows: 510 Columns: 5── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (3): unitcode, name, parent_unitcode
dbl (2): year, degrees
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Joining, by = "type"
degrees

OKM euros from publications per euro used for TR staff salary

Background

Costs are estimated using the following table:

group_costs

Note that this only covers core teaching and research staff, and does not take into account differences in salaries between faculties. Of these, “Opetuksen ja tutkimuksen tukihenkilöstö / TR Support Staff” is probably particularly problematic, as its proportion is significant in multiple faculties:

pwy %>%
  filter(year<2023,unit!="H92 Tohtoriohjelmat") %>%
  group_by(year,unit,subgroup) %>%
  summarize(pwy=sum(pwy),.groups="drop") %>%
  ggplot(aes(x=year,y=pwy,color=subgroup)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks=seq(1000,3000,by=4)) +
  facet_wrap(~unit,scales="free_y",ncol=4) +
  theme_hsci_discrete(base_family="Arial") +
  theme(legend.position="bottom") +
  ylab("Person work years") +
  xlab("Year") +
  guides(color=guide_legend(nrow=6))

Thus, in the following analyses, the units where the proportion of teaching support staff in person work years is more than 10% of the total have been grayed out as problematic to compare.

To better approximate the fact that research contributions affect publications with a delay, the gains for publications in a year are weighted by a mean of the staff costs for the year as well as the previous year.

Analysis between faculties

publ %>% 
  filter(year>2017,year<=2022) %>%
  complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
  left_join(publ_gains) %>%
  group_by(year,unit) %>%
  summarise(gain=sum(gain*n,na.rm=T)) %>%
  mutate(unit=fct_reorder(unit,gain)) %>%
  ggplot(aes(x=unit,y=gain)) +
  geom_boxplot() +
  theme_hsci(base_family="Arial") +
  theme(legend.position="bottom") +
  scale_y_continuous(labels=scales::comma_format()) +
  xlab("Faculty") +
  ylab("Euros gained per year") +
  guides(color="none") +
  coord_flip()
Joining, by = "jufo"`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

problematic_units <- 
  pwy %>% 
  group_by(unit,subgroup) %>% 
  summarize(pwy=sum(pwy)) %>% 
  group_by(unit) %>% 
  mutate(prop=pwy/sum(pwy)) %>%
  summarise(problematic=any(subgroup=="Opetuksen ja tutkimuksen tukihenkilöstö / TR Support Staff" & prop>=0.1), .groups="drop")
`summarise()` has grouped output by 'unit'. You can override using the `.groups` argument.
pwy %>%
  filter(year>2017,year<=2022,!(unit %in% c("H92 Tohtoriohjelmat","H01 Yliopistopalvelut","H00 Yliopiston johto ja yhteiset", "H99 Erilliset laitokset"))) %>%
  group_by(year,unit) %>%
  summarise(cost=sum(cost,na.rm=T),.groups="drop") %>%
  left_join(
    publ %>% 
      filter(year>2017,year<=2022) %>%
      complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
      left_join(publ_gains) %>%
      group_by(year,unit) %>%
      summarise(gain=sum(gain*n,na.rm=T))
  ) %>%
  group_by(unit) %>%
  arrange(year) %>%
  mutate(publ_efficiency=gain/(lag(cost)+cost)*2) %>%
  ungroup() %>%
  left_join(problematic_units) %>%
  filter(!is.na(publ_efficiency)) %>%
  mutate(unit=fct_reorder(unit,if_else(problematic,publ_efficiency,100000000+publ_efficiency))) %>%
  ggplot(aes(x=unit,y=publ_efficiency,color=if_else(problematic,NA,unit=="H40 Humanistinen tiedekunta"))) +
  geom_boxplot() +
  scale_color_manual(values=c("black","red","gray")) +
  theme_hsci(base_family="Arial") +
  theme(legend.position="bottom") +
  xlab("Faculty") +
  ylab("Euros gained per euro spent") +
  guides(color="none") +
  coord_flip()
Joining, by = "jufo"`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.Joining, by = c("year", "unit")Joining, by = "unit"

Analysis within the Faculty of Arts

publ %>% 
  filter(unit=="H40 Humanistinen tiedekunta",year>2017,year<=2022) %>%
  complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
  left_join(publ_gains) %>%
  group_by(year,subunit) %>%
  summarise(gain=sum(gain*n,na.rm=T)) %>%
  mutate(subunit=fct_reorder(subunit,gain)) %>%
  ggplot(aes(x=subunit,y=gain,color=subunit=="H402 Digitaalisten ihmistieteiden osasto")) +
  geom_boxplot() +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  scale_y_continuous(labels=scales::comma_format()) +
  theme(legend.position="bottom") +
  theme(legend.position="bottom") +
  xlab("Unit") +
  ylab("Euros gained per year") +
  guides(color="none") +
  coord_flip()
Joining, by = "jufo"`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

pwy %>%
  filter(unit=="H40 Humanistinen tiedekunta",subunit!="H400 Tiedekunnan yhteiset",year>2017,year<=2022) %>%
  group_by(year,subunit) %>%
  summarise(cost=sum(cost,na.rm=T),.groups="drop") %>%
  left_join(
    publ %>% 
      filter(year>2017,year<=2022) %>%
      complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
      left_join(publ_gains) %>%
      group_by(year,subunit) %>%
      summarise(gain=sum(gain*n,na.rm=T))
  ) %>%
  group_by(subunit) %>%
  arrange(year) %>%
  mutate(publ_efficiency=gain/(lag(cost)+cost)*2) %>%
  ungroup() %>%
  filter(!is.na(publ_efficiency)) %>%
  mutate(subunit=fct_reorder(subunit,publ_efficiency)) %>%
  ggplot(aes(x=subunit,y=publ_efficiency,color=subunit=="H402 Digitaalisten ihmistieteiden osasto")) +
  geom_boxplot() +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  theme(legend.position="bottom") +
  theme(legend.position="bottom") +
  xlab("Unit") +
  ylab("Euros gained per euro spent") +
  guides(color="none") +
  coord_flip()
Joining, by = "jufo"`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.Joining, by = c("year", "subunit")

Teaching efficiency

Background

To evaluate teaching efficiency, the average number of ECTS credits gained per course is used. This is the most non-problematic estimate that I could think of for the amount of work put in, as person work year estimates are not tied to teaching programmes and can thus not be used.

Further, to compare the monetary efficiency between master’s and bachelor’s programmes in a single view, gains per programme are calculated by taking OKM euros gained by the University of Helsinki from bachelor’s and master’s degrees and then dividing those by the nominal sizes of the degrees, distributing the gains according to the ECTS outputs of each unit. This is intended to better represent overall teaching contribution toward the degrees so that also course credit going toward minors and degrees in other programmes would be appropriately counted.

Analysis between faculties

teaching %>%
  filter(unittype=="programme") %>%
  select(-name) %>%
  left_join(teaching %>% distinct(parent_unitcode=unitcode,name)) %>%
  group_by(year,name) %>%
  summarize(gain=sum(ects*gain_per_ects),.groups="drop") %>%
  filter(!is.na(gain)) %>%
  mutate(name=fct_reorder(name,gain)) %>%
  ggplot(aes(x=name,y=gain,color=name=="Faculty of Arts")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Faculty") +
  ylab("Total euros gained per year") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
Joining, by = "parent_unitcode"

teaching %>% 
  filter(unittype=="course") %>%
  mutate(unitcode=parent_unitcode) %>%
  select(-parent_unitcode,-name,-gain_per_ects) %>%
  inner_join(teaching %>% distinct(unitcode,parent_unitcode,gain_per_ects)) %>% 
  inner_join(teaching %>% distinct(name,parent_unitcode=unitcode)) %>%
  filter(!is.na(gain_per_ects)) %>%
  mutate(name=fct_reorder(name,ects*gain_per_ects)) %>%
  ggplot(aes(x=name,y=ects*gain_per_ects,color=name=="Faculty of Arts")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip(ylim=c(0,25000)) +
  xlab("Faculty") +
  ylab("Euros gained per course given") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
Joining, by = "unitcode"Joining, by = "parent_unitcode"

Analysis within the Faculty of Arts

teaching %>%
  filter(parent_unitcode=="H40") %>%
  mutate(name=fct_reorder(name,ects*gain_per_ects)) %>%
  ggplot(aes(x=name,y=ects*gain_per_ects,color=name=="Master's Programme in Linguistic Diversity and Digital Humanities")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Programme") +
  ylab("Total euros gained per year") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")

teaching %>% 
  filter(unittype=="course") %>%
  mutate(unitcode=parent_unitcode) %>%
  select(-parent_unitcode,-name,-gain_per_ects) %>%
  inner_join(teaching %>% distinct(unitcode,parent_unitcode,name,gain_per_ects)) %>% 
  filter(parent_unitcode=="H40") %>%
  mutate(name=fct_reorder(name,ects*gain_per_ects)) %>%
  ggplot(aes(x=name,y=ects*gain_per_ects,color=name=="Master's Programme in Linguistic Diversity and Digital Humanities")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip(ylim=c(0,25000)) +
  xlab("Faculty") +
  ylab("Euros gained per course given") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
Joining, by = "unitcode"

Analysis within the Master’s Programme in Linguistic Diversity and Digital Humanities

teaching %>% 
  filter(str_detect(unitcode,"^LDA-")) %>%
  mutate(unitcode=str_sub(unitcode,end=5)) %>%
  mutate(unitcode=fct_recode(unitcode,"Common"="LDA-3","MA-Thesis"="LDA-8","Digital humanities"="LDA-H","Cognitive science"="LDA-C","Language technology"="LDA-T","Phonetics"="LDA-P","Linguistics"="LDA-D","Linguistics"="LDA-G","Linguistics"="LDA-L","Common"="LDA-E","Common"="LDA-M")) %>%
  mutate(gain=ects*(gains %>% filter(type=="Master's") %>% pull(gain))/120) %>%
  mutate(unitcode=fct_reorder(unitcode,gain)) %>%
  ggplot(aes(x=unitcode,y=gain)) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Track") +
  ylab("Euros gained per course given") +
  theme_hsci_discrete(base_family="Arial")

---
title: "Data creation and analysis examples"
date: "`r Sys.Date()`"
output: 
  html_notebook:
    toc: yes
    code_folding: hide
  md_document:
    variant: gfm 
    toc: yes
---

# Setup and data loading

```{r setup}
source(here::here("src/common_basis.R"), local = knitr::knit_global())
```

## Person work year data

```{r}
group_costs <- tribble(
  ~subgroup,~cost,
  "4. porras / TR Staff Level 4",6500,
  "3. porras / TR Staff Level 3",5000,
  "2. porras / TR Staff Level 2",3500,
  "1. porras / TR Staff Level 1",2500,
  "Opetuksen ja tutkimuksen tukihenkilöstö / TR Support Staff",3000
)

pwy_in <- read_csv(here("data/input/LTS_henkilo_tehtavajaottelu_henkilostoryhma-htv.csv"),locale=locale(decimal_mark=','),col_types=cols(...35='c',...36='c',`Henkilöstöryhmä / Staff Group`='c',`Henkilöstöalaryhmä / Staff Subgroup`='c',`Vuosi / Year`='i',.default="d"))
pwy <- pwy_in %>%
  rename(year=`Vuosi / Year`,group=`Henkilöstöryhmä / Staff Group`,subgroup=`Henkilöstöalaryhmä / Staff Subgroup`,unit=...35,subunit=...36) %>%
  select(year,2:15,group,subgroup,unit,subunit) %>%
  pivot_longer(2:15,values_to="pwy") %>%
  filter(!is.na(pwy)) %>%
  select(-name) %>%
  distinct() %>%
  left_join(group_costs) %>%
#  replace_na(list(cost=2500)) %>%
  mutate(cost=pwy*cost)
pwy
```

## OKM gains data

```{r}
gains <- read_csv(here("data/input/Rahoitus vuodelle 2022 suoritteen laskennallinen tuotto.csv"), locale=locale(decimal_mark=',')) %>%
  rename(gain=`Suoritteen laskennallinen tuotto vuoden 2022 rahoituksessa / Value of an output in 2022 funding`,okm_weight=`Kriteerin painoarvo OKM-rahoitusmallissa / Share of the criteria in the MinEdu core funding modell`,type=Suorite,subtype=Nimi) %>%
  distinct(type,subtype,okm_weight,gain) %>%
  mutate(type=case_when(
    type == "2 Tutkintopiste, alempi korkeakoulututkinto / Degree point, Bachelor's degree" ~ "Bachelor's",
    type == "1 Tutkintopiste, ylempi korkeakoulututkinto / Degree point, Master's degree" ~ "Master's",
    T ~ type
  ))
gains
```


## Publication data

```{r}
publ <- read_csv(here("data/input/OKM-seurantakohteet jufo-tasoluokat Osasto ja Alayksikkö.csv")) %>%
  rename(unit=`Tiedekunnan koodi ja nimi`,subunit=`Laitoksen koodi ja nimi`,subsubunit=...1,year=Julkaisuvuosi...4,jufo=`Jufoluokitus (Virta)`,jufo2=`Vahvistettu julkaisukanavaluokitus`,n=`Julkaisujen lukumäärä`) %>%
  select(-c(Julkaisuvuosi...10,`%`,`Viety okm`)) %>%
  group_by(across(-c(`Vahvistettu julkaisufoorumi id`,n))) %>%
  summarize(n=sum(n),.groups="drop")

publ_values <- tribble(
  ~jufo, ~value,
  0, 0.1,
  1, 1,
  2, 3,
  3, 4
)

publ_gains <- publ_values %>%
  mutate(gain=(gains %>% filter(type=="9 Julkaisupiste / Point, scientific publications") %>% pull(gain))*value)

publ
```

## Teaching data

```{r}
teaching <- read_tsv(here("data/processed/all-courses.tsv.gz")) %>%
  filter(ects!=0) %>%
  mutate(type=case_when(
    unittype=="programme" & str_detect(name, "Bachelor") ~ "Bachelor's",
    unittype=="programme" & str_detect(name, "Master|Magister|^Degree") ~ "Master's"
  )) %>% left_join(
    gains %>% 
      filter(type %in% c("Bachelor's","Master's")) %>%
      select(type,gain)
  ) %>%
  mutate(gain_per_ects=if_else(type=="Bachelor's degree",gain/180,gain/120)) %>%
  select(-gain)
teaching
```

## Degree attainment data

```{r}
degrees <- read_tsv(here("data/processed/degrees-attained.tsv.gz")) %>%
  mutate(type=case_when(
    str_detect(name, "Bachelor") ~ "2 Tutkintopiste, alempi korkeakoulututkinto / Degree point, Bachelor's degree",
    str_detect(name, "Master|Magister|^Degree Programme") ~ "1 Tutkintopiste, ylempi korkeakoulututkinto / Degree point, Master's degree"
  )) %>%
  left_join(gains %>% select(type,gain)) %>%
  mutate(gain=degrees*gain)
degrees
```

# OKM euros from publications per euro used for TR staff salary

## Background

Costs are estimated using the following table:

```{r}
group_costs
```
 
Note that this only covers core teaching and research staff, and does not take into account differences in salaries between faculties. Of these, "Opetuksen ja tutkimuksen tukihenkilöstö / TR Support Staff" is probably particularly problematic, as its proportion is significant in multiple faculties:

```{r,fig.width=8,fig.height=11}
pwy %>%
  filter(year<2023,unit!="H92 Tohtoriohjelmat") %>%
  group_by(year,unit,subgroup) %>%
  summarize(pwy=sum(pwy),.groups="drop") %>%
  ggplot(aes(x=year,y=pwy,color=subgroup)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks=seq(1000,3000,by=4)) +
  facet_wrap(~unit,scales="free_y",ncol=4) +
  theme_hsci_discrete(base_family="Arial") +
  theme(legend.position="bottom") +
  ylab("Person work years") +
  xlab("Year") +
  guides(color=guide_legend(nrow=6))
```

Thus, in the following analyses, the **units where the proportion of teaching support staff in person work years is more than 10% of the total have been grayed out as problematic to compare**.

**To better approximate the fact that research contributions affect publications with a delay, the gains for publications in a year are weighted by a mean of the staff costs for the year as well as the previous year**.

## Analysis between faculties

```{r}
publ %>% 
  filter(year>2017,year<=2022) %>%
  complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
  left_join(publ_gains) %>%
  group_by(year,unit) %>%
  summarise(gain=sum(gain*n,na.rm=T)) %>%
  mutate(unit=fct_reorder(unit,gain)) %>%
  ggplot(aes(x=unit,y=gain)) +
  geom_boxplot() +
  theme_hsci(base_family="Arial") +
  theme(legend.position="bottom") +
  scale_y_continuous(labels=scales::comma_format()) +
  xlab("Faculty") +
  ylab("Euros gained per year") +
  guides(color="none") +
  coord_flip()
```


```{r}
problematic_units <- 
  pwy %>% 
  group_by(unit,subgroup) %>% 
  summarize(pwy=sum(pwy)) %>% 
  group_by(unit) %>% 
  mutate(prop=pwy/sum(pwy)) %>%
  summarise(problematic=any(subgroup=="Opetuksen ja tutkimuksen tukihenkilöstö / TR Support Staff" & prop>=0.1), .groups="drop")

pwy %>%
  filter(year>2017,year<=2022,!(unit %in% c("H92 Tohtoriohjelmat","H01 Yliopistopalvelut","H00 Yliopiston johto ja yhteiset", "H99 Erilliset laitokset"))) %>%
  group_by(year,unit) %>%
  summarise(cost=sum(cost,na.rm=T),.groups="drop") %>%
  left_join(
    publ %>% 
      filter(year>2017,year<=2022) %>%
      complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
      left_join(publ_gains) %>%
      group_by(year,unit) %>%
      summarise(gain=sum(gain*n,na.rm=T))
  ) %>%
  group_by(unit) %>%
  arrange(year) %>%
  mutate(publ_efficiency=gain/(lag(cost)+cost)*2) %>%
  ungroup() %>%
  left_join(problematic_units) %>%
  filter(!is.na(publ_efficiency)) %>%
  mutate(unit=fct_reorder(unit,if_else(problematic,publ_efficiency,100000000+publ_efficiency))) %>%
  ggplot(aes(x=unit,y=publ_efficiency,color=if_else(problematic,NA,unit=="H40 Humanistinen tiedekunta"))) +
  geom_boxplot() +
  scale_color_manual(values=c("black","red","gray")) +
  theme_hsci(base_family="Arial") +
  theme(legend.position="bottom") +
  xlab("Faculty") +
  ylab("Euros gained per euro spent") +
  guides(color="none") +
  coord_flip()
```

## Analysis within the Faculty of Arts

```{r}
publ %>% 
  filter(unit=="H40 Humanistinen tiedekunta",year>2017,year<=2022) %>%
  complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
  left_join(publ_gains) %>%
  group_by(year,subunit) %>%
  summarise(gain=sum(gain*n,na.rm=T)) %>%
  mutate(subunit=fct_reorder(subunit,gain)) %>%
  ggplot(aes(x=subunit,y=gain,color=subunit=="H402 Digitaalisten ihmistieteiden osasto")) +
  geom_boxplot() +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  scale_y_continuous(labels=scales::comma_format()) +
  theme(legend.position="bottom") +
  theme(legend.position="bottom") +
  xlab("Unit") +
  ylab("Euros gained per year") +
  guides(color="none") +
  coord_flip()
```


```{r}
pwy %>%
  filter(unit=="H40 Humanistinen tiedekunta",subunit!="H400 Tiedekunnan yhteiset",year>2017,year<=2022) %>%
  group_by(year,subunit) %>%
  summarise(cost=sum(cost,na.rm=T),.groups="drop") %>%
  left_join(
    publ %>% 
      filter(year>2017,year<=2022) %>%
      complete(year,subunit,unit,jufo,fill=list(n=0)) %>%
      left_join(publ_gains) %>%
      group_by(year,subunit) %>%
      summarise(gain=sum(gain*n,na.rm=T))
  ) %>%
  group_by(subunit) %>%
  arrange(year) %>%
  mutate(publ_efficiency=gain/(lag(cost)+cost)*2) %>%
  ungroup() %>%
  filter(!is.na(publ_efficiency)) %>%
  mutate(subunit=fct_reorder(subunit,publ_efficiency)) %>%
  ggplot(aes(x=subunit,y=publ_efficiency,color=subunit=="H402 Digitaalisten ihmistieteiden osasto")) +
  geom_boxplot() +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  theme(legend.position="bottom") +
  theme(legend.position="bottom") +
  xlab("Unit") +
  ylab("Euros gained per euro spent") +
  guides(color="none") +
  coord_flip()
```

# Teaching efficiency

## Background

**To evaluate teaching efficiency, the average number of ECTS credits gained per course is used**. This is the most non-problematic estimate that I could think of for the amount of work put in, as person work year estimates are not tied to teaching programmes and can thus not be used.

Further, to compare the monetary efficiency between master's and bachelor's programmes in a single view, gains per programme are calculated by taking OKM euros gained by the University of Helsinki from bachelor's and master's degrees and then dividing those by the nominal sizes of the degrees, distributing the gains according to the ECTS outputs of each unit. This is intended to better represent overall teaching contribution toward the degrees so that also course credit going toward minors and degrees in other programmes would be appropriately counted.

## Analysis between faculties

```{r}
teaching %>%
  filter(unittype=="programme") %>%
  select(-name) %>%
  left_join(teaching %>% distinct(parent_unitcode=unitcode,name)) %>%
  group_by(year,name) %>%
  summarize(gain=sum(ects*gain_per_ects),.groups="drop") %>%
  filter(!is.na(gain)) %>%
  mutate(name=fct_reorder(name,gain)) %>%
  ggplot(aes(x=name,y=gain,color=name=="Faculty of Arts")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Faculty") +
  ylab("Total euros gained per year") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
```

```{r}
teaching %>% 
  filter(unittype=="course") %>%
  mutate(unitcode=parent_unitcode) %>%
  select(-parent_unitcode,-name,-gain_per_ects) %>%
  inner_join(teaching %>% distinct(unitcode,parent_unitcode,gain_per_ects)) %>% 
  inner_join(teaching %>% distinct(name,parent_unitcode=unitcode)) %>%
  filter(!is.na(gain_per_ects)) %>%
  mutate(name=fct_reorder(name,ects*gain_per_ects)) %>%
  ggplot(aes(x=name,y=ects*gain_per_ects,color=name=="Faculty of Arts")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip(ylim=c(0,25000)) +
  xlab("Faculty") +
  ylab("Euros gained per course given") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
```

## Analysis within the Faculty of Arts

```{r}
teaching %>%
  filter(parent_unitcode=="H40") %>%
  mutate(name=fct_reorder(name,ects*gain_per_ects)) %>%
  ggplot(aes(x=name,y=ects*gain_per_ects,color=name=="Master's Programme in Linguistic Diversity and Digital Humanities")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Programme") +
  ylab("Total euros gained per year") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
```

```{r}
teaching %>% 
  filter(unittype=="course") %>%
  mutate(unitcode=parent_unitcode) %>%
  select(-parent_unitcode,-name,-gain_per_ects) %>%
  inner_join(teaching %>% distinct(unitcode,parent_unitcode,name,gain_per_ects)) %>% 
  filter(parent_unitcode=="H40") %>%
  mutate(name=fct_reorder(name,ects*gain_per_ects)) %>%
  ggplot(aes(x=name,y=ects*gain_per_ects,color=name=="Master's Programme in Linguistic Diversity and Digital Humanities")) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip(ylim=c(0,25000)) +
  xlab("Faculty") +
  ylab("Euros gained per course given") +
  theme_hsci(base_family="Arial") +
  scale_color_manual(values=c("black","red")) +
  guides(color="none")
```

## Analysis within the Master's Programme in Linguistic Diversity and Digital Humanities

```{r}
teaching %>% 
  filter(str_detect(unitcode,"^LDA-")) %>%
  mutate(unitcode=str_sub(unitcode,end=5)) %>%
  mutate(unitcode=fct_recode(unitcode,"Common"="LDA-3","MA-Thesis"="LDA-8","Digital humanities"="LDA-H","Cognitive science"="LDA-C","Language technology"="LDA-T","Phonetics"="LDA-P","Linguistics"="LDA-D","Linguistics"="LDA-G","Linguistics"="LDA-L","Common"="LDA-E","Common"="LDA-M")) %>%
  group_by(year,unitcode) %>%
  summarise(gain=sum(ects*(gains %>% filter(type=="Master's") %>% pull(gain))/120),.groups="drop") %>%
  mutate(unitcode=fct_reorder(unitcode,gain)) %>%
  ggplot(aes(x=unitcode,y=gain)) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Track") +
  ylab("Total Euros gained per year") +
  theme_hsci_discrete(base_family="Arial")
```

```{r}
teaching %>% 
  filter(str_detect(unitcode,"^LDA-")) %>%
  mutate(unitcode=str_sub(unitcode,end=5)) %>%
  mutate(unitcode=fct_recode(unitcode,"Common"="LDA-3","MA-Thesis"="LDA-8","Digital humanities"="LDA-H","Cognitive science"="LDA-C","Language technology"="LDA-T","Phonetics"="LDA-P","Linguistics"="LDA-D","Linguistics"="LDA-G","Linguistics"="LDA-L","Common"="LDA-E","Common"="LDA-M")) %>%
  mutate(gain=ects*(gains %>% filter(type=="Master's") %>% pull(gain))/120) %>%
  mutate(unitcode=fct_reorder(unitcode,gain)) %>%
  ggplot(aes(x=unitcode,y=gain)) +
  scale_y_continuous(labels=scales::number_format()) +
  geom_boxplot() + 
  coord_flip() +
  xlab("Track") +
  ylab("Euros gained per course given") +
  theme_hsci_discrete(base_family="Arial")
```
