library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(kableExtra)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(tidytext)
library(ggh4x)
## Warning: package 'ggh4x' was built under R version 4.2.1
Randomly review 20 rows of data from the data set.
data <- read.csv("data.csv")
sample_n(data, 20) %>%
kbl() %>%
kable_styling(bootstrap_options = c("bordered", "hover", "stripped"))
| student | Uni | New_Adjusted_Attrition_2019 | New_Normal_Attrition_2019 | Success_2020 | New_Adjusted_Retention_2019 | New_Normal_Retention_2019 |
|---|---|---|---|---|---|---|
| overall | The Australian National University | 93.49 | ||||
| overall | Torrens University Australia(d) | 51.8 | ||||
| domestic | The University of Sydney | 4.87 | ||||
| overall | University of Technology Sydney | 88 | ||||
| overall | Victoria University | 19.36 | ||||
| international | Queensland University of Technology | 87.9 | ||||
| international | Victoria University | 86.91 | ||||
| international | Edith Cowan University | 85.57 | ||||
| domestic | Federation University Australia(c) | 23.33 | ||||
| overall | Victoria University | 90.14 | ||||
| international | The University of Notre Dame Australia | 84.21 | ||||
| domestic | Queensland University of Technology | 88.62 | ||||
| international | Swinburne University of Technology | 89.81 | ||||
| overall | University of Wollongong | 83.48 | ||||
| international | The University of Melbourne | 6.15 | ||||
| domestic | University of Southern Queensland | 77.23 | ||||
| domestic | University of the Sunshine Coast | 80.42 | ||||
| overall | Griffith University | 78.05 | ||||
| overall | Australian Catholic University | 19.94 | ||||
| overall | The University of Queensland | 13.23 |
Some data cleaning:
mydata <- data %>%
pivot_longer(c(3:7), names_to = "method", values_to = "values_per") %>%
mutate(metrics = case_when(method = str_detect(method, "Attrition") ~ "Attrition",
method = str_detect(method, "Retention") ~ "Retention",
TRUE ~ "Success"),
year = str_extract(method, "[:digit:]+"),
method = str_remove(method, "_Attrition_2019"),
method = str_remove(method, "_Retention_2019"),
method = str_remove(method, "_2020")) %>%
relocate(year, .before = method) %>%
relocate(metrics, .before = method) %>%
relocate(Uni, .after = method) %>%
relocate(student, .after = Uni) %>%
mutate(values_per = as.double(values_per)) %>%
mutate_if(is.character, as.factor) %>%
na.omit()
Randomly review 20 rows of data from the cleaned data set.
sample_n(mydata, 20) %>%
kbl() %>%
kable_styling(bootstrap_options = c("bordered", "hover", "stripped"))
| year | metrics | method | Uni | student | values_per |
|---|---|---|---|---|---|
| 2019 | Attrition | New_Adjusted | The University of Melbourne | domestic | 3.43 |
| 2020 | Success | Success | Victoria University | overall | 90.14 |
| 2020 | Success | Success | Queensland University of Technology | domestic | 82.25 |
| 2020 | Success | Success | The University of Newcastle | domestic | 85.11 |
| 2020 | Success | Success | Macquarie University | overall | 84.66 |
| 2020 | Success | Success | The University of New England | international | 76.60 |
| 2020 | Success | Success | The University of Sydney | international | 85.52 |
| 2019 | Retention | New_Normal | CQUniversity | overall | 69.81 |
| 2019 | Retention | New_Adjusted | Edith Cowan University | domestic | 81.90 |
| 2019 | Attrition | New_Normal | The University of New England | international | 22.13 |
| 2019 | Attrition | New_Adjusted | Southern Cross University | domestic | 21.85 |
| 2019 | Retention | New_Normal | The University of Melbourne | international | 93.72 |
| 2020 | Success | Success | The University of Western Australia | overall | 87.51 |
| 2019 | Retention | New_Adjusted | Torrens University Australia(d) | domestic | 76.64 |
| 2019 | Attrition | New_Normal | Western Sydney University | international | 14.76 |
| 2020 | Success | Success | University of the Sunshine Coast | domestic | 80.42 |
| 2019 | Attrition | New_Adjusted | The University of Notre Dame Australia | domestic | 9.56 |
| 2019 | Retention | New_Normal | The University of Queensland | overall | 86.63 |
| 2020 | Success | Success | The University of Queensland | domestic | 85.39 |
| 2019 | Retention | New_Normal | Southern Cross University | overall | 67.28 |
Summary of the data:
summary(mydata, maxsum = 50)
## year metrics method
## 2019:252 Attrition:126 New_Adjusted: 84
## 2020:126 Retention:126 New_Normal :168
## Success :126 Success :126
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
## Uni
## Australian Catholic University :9
## Australian Maritime College(e) :0
## Batchelor Institute of Indigenous Tertiary Education(f):0
## Bond University :9
## Charles Darwin University(f) :9
## Charles Sturt University :9
## CQUniversity :9
## Curtin University :9
## Deakin University :9
## Edith Cowan University :9
## Federation University Australia(c) :9
## Flinders University :9
## Griffith University :9
## James Cook University :9
## La Trobe University :9
## Macquarie University :9
## Monash University :9
## Murdoch University :9
## National Total :9
## Queensland University of Technology :9
## RMIT University :9
## Southern Cross University :9
## Swinburne University of Technology :9
## The Australian National University :9
## The University of Adelaide :9
## The University of Melbourne :9
## The University of New England :9
## The University of Newcastle :9
## The University of Notre Dame Australia :9
## The University of Queensland :9
## The University of Sydney :9
## The University of Western Australia :9
## Torrens University Australia(d) :9
## University of Canberra :9
## University of Divinity :9
## University of New South Wales(b) :9
## University of South Australia :9
## University of Southern Queensland :9
## University of Tasmania(e) :9
## University of Technology Sydney :9
## University of the Sunshine Coast :9
## University of Wollongong :9
## Victoria University :9
## Western Sydney University :9
## student values_per
## domestic :126 Min. : 3.43
## international:126 1st Qu.:20.58
## overall :126 Median :81.38
## Mean :61.94
## 3rd Qu.:88.04
## Max. :97.64
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
The data structure has desired data type allocated.
glimpse(mydata)
## Rows: 378
## Columns: 6
## $ year <fct> 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019, 2019,…
## $ metrics <fct> Attrition, Attrition, Attrition, Attrition, Attrition, Attr…
## $ method <fct> New_Adjusted, New_Adjusted, New_Adjusted, New_Adjusted, New…
## $ Uni <fct> National Total, Charles Sturt University, Macquarie Univers…
## $ student <fct> domestic, domestic, domestic, domestic, domestic, domestic,…
## $ values_per <dbl> 13.23, 21.01, 8.77, 21.85, 24.90, 12.92, 4.87, 3.86, 4.99, …
There is no missing value in the dataset (NA).
colSums(is.na(mydata))
## year metrics method Uni student values_per
## 0 0 0 0 0 0
mydata %>%
group_by(year, metrics, method, student) %>%
summarise(count = n()) %>%
kbl(align = "c") %>%
kable_paper() %>%
column_spec(1, bold = T) %>%
collapse_rows(columns = 1:4, valign = "top")
## `summarise()` has grouped output by 'year', 'metrics', 'method'. You can
## override using the `.groups` argument.
| year | metrics | method | student | count |
|---|---|---|---|---|
| 2019 | Attrition | New_Adjusted | domestic | 42 |
| 2019 | Attrition | New_Normal | international | 42 |
| 2019 | Attrition | New_Normal | overall | 42 |
| 2019 | Retention | New_Adjusted | domestic | 42 |
| 2019 | Retention | New_Normal | international | 42 |
| 2019 | Retention | New_Normal | overall | 42 |
| 2020 | Success | Success | domestic | 42 |
| 2020 | Success | Success | international | 42 |
| 2020 | Success | Success | overall | 42 |
collapse_rows_dt <- data.frame(C1 = c(rep("a", 10), rep("b", 5)),
C2 = c(rep("c", 7), rep("d", 3), rep("c", 2), rep("d", 3)),
C3 = 1:15,
C4 = sample(c(0,1), 15, replace = TRUE))
kbl(collapse_rows_dt, align = "c") %>%
kable_paper(full_width = F) %>%
column_spec(1, bold = T) %>%
collapse_rows(columns = 1:2, valign = "top")
| C1 | C2 | C3 | C4 |
|---|---|---|---|
| a | c | 1 | 0 |
| a | c | 2 | 0 |
| a | c | 3 | 0 |
| a | c | 4 | 1 |
| a | c | 5 | 0 |
| a | c | 6 | 1 |
| a | c | 7 | 0 |
| a | d | 8 | 1 |
| a | d | 9 | 0 |
| a | d | 10 | 1 |
| b | c | 11 | 1 |
| b | c | 12 | 1 |
| b | d | 13 | 0 |
| b | d | 14 | 1 |
| b | d | 15 | 0 |
df1 <- mydata %>%
filter(metrics == "Attrition") %>%
dplyr::select(-metrics) %>%
mutate(label = reorder_within(x = Uni, by = values_per, within = student))
For New Adjusted Attrition rate,
df1
## # A tibble: 126 × 6
## year method Uni student values_per label
## <fct> <fct> <fct> <fct> <dbl> <fct>
## 1 2019 New_Adjusted National Total domestic 13.2 Nati…
## 2 2019 New_Adjusted Charles Sturt University domestic 21.0 Char…
## 3 2019 New_Adjusted Macquarie University domestic 8.77 Macq…
## 4 2019 New_Adjusted Southern Cross University domestic 21.8 Sout…
## 5 2019 New_Adjusted The University of New England domestic 24.9 The …
## 6 2019 New_Adjusted The University of Newcastle domestic 12.9 The …
## 7 2019 New_Adjusted The University of Sydney domestic 4.87 The …
## 8 2019 New_Adjusted University of New South Wales(b) domestic 3.86 Univ…
## 9 2019 New_Adjusted University of Technology Sydney domestic 4.99 Univ…
## 10 2019 New_Adjusted University of Wollongong domestic 11.3 Univ…
## # … with 116 more rows
## # ℹ Use `print(n = ...)` to see more rows
ggplot(df1, aes(y = label, x = values_per)) +
geom_bar(stat = "identity", width = 0.7) +
facet_nested(year ~ method+student, scales = "free") +
theme_bw() +
scale_y_reordered()