library(httr)
wiki <- GET("https://en.wikipedia.org/wiki/Times_Higher_Education_World_University_Rankings")
wiki
## Response [https://en.wikipedia.org/wiki/Times_Higher_Education_World_University_Rankings]
## Date: 2020-12-12 10:20
## Status: 200
## Content-Type: text/html; charset=UTF-8
## Size: 274 kB
## <!DOCTYPE html>
## <html class="client-nojs" lang="en" dir="ltr">
## <head>
## <meta charset="UTF-8"/>
## <title>Times Higher Education World University Rankings - Wikipedia</title>
## <script>document.documentElement.className="client-js";RLCONF={"wgBreakF...
## "Articles with short description","Short description matches Wikidata","...
## "wgULSCurrentAutonym":"English","wgNoticeProject":"wikipedia","wgCentral...
## "ext.gadget.switcher","ext.centralauth.centralautologin","mmv.head","mmv...
## <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.implement("user.o...
## ...
library(XML)
doc1 <- readHTMLTable(
doc = content(wiki, "text"))
#str(doc1)
top50 <- doc1[[3]] # save the third list - Times Higher Education World University Rankings—Top 50
top25 <- doc1[[4]] # save the third list - Times Higher Education World Reputation Rankings—Top 25
library(kableExtra)
top50[1:20, ] %>%
kable() %>%
kable_styling()
| V1 | V2 | V3 | V4 | V5 | V6 | V7 | V8 | V9 | V10 | V11 | V12 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Institution | 2010–11[42] | 2011–12[43] | 2012–13[44] | 2013–14[45] | 2014–15[46] | 2015–16[47] | 2016–17[48] | 2017-18[49] | 2018–19[50] | 2019–20[51] | 2020–21[52] |
| University of Oxford | 6 | 4 | 2 | 2 | 3 | 2 | 1 | 1 | 1 | 1 | 1 |
| Stanford University | 4 | 2 | 3 | 4 | 4 | 3 | 3 | 3 | 3 | 4 | 2 |
| Harvard University | 1 | 2 | 4 | 2 | 2 | 6 | 6 | 6 | 6 | 7 | 3 |
| California Institute of Technology | 2 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | 5 | 2 | 4 |
| Massachusetts Institute of Technology | 3 | 7 | 5 | 5 | 6 | 5 | 5 | 5 | 4 | 5 | 5 |
| University of Cambridge | 6 | 6 | 7 | 7 | 5 | 4 | 4 | 2 | 2 | 3 | 6 |
| University of California, Berkeley | 8 | 10 | 9 | 8 | 8 | 13 | 10 | 18 | 15 | 13 | 7 |
| Yale University | 10 | 11 | 11 | 11 | 9 | 12 | 12 | 12 | 8 | 8 | 8 |
| Princeton University | 5 | 5 | 6 | 6 | 7 | 7 | 7 | 7 | 7 | 6 | 9 |
| University of Chicago | 13 | 9 | 10 | 9 | 11 | 10 | 10 | 9 | 10 | 9 | 10 |
| Imperial College London | 9 | 8 | 8 | 10 | 9 | 8 | 8 | 8 | 9 | 10 | 11 |
| Johns Hopkins University | 13 | 14 | 16 | 15 | 15 | 11 | 17 | 13 | 12 | 12 | 12 |
| University of Pennsylvania | 1 | 16 | 15 | 16 | 16 | 17 | 13 | 10 | 12 | 11 | 13 |
| Swiss Federal Institute of Technology in Zurich | 15 | 15 | 12 | 14 | 13 | 9 | 9 | 10 | 11 | 13 | 14 |
| University of California, Los Angeles | 11 | 13 | 13 | 12 | 12 | 16 | 14 | 15 | 17 | 17 | 15 |
| University College London | 22 | 17 | 17 | 21 | 22 | 14 | 15 | 16 | 14 | 15 | 16 |
| Columbia University | 18 | 12 | 14 | 13 | 14 | 15 | 16 | 14 | 16 | 16 | 17 |
| University of Toronto | 17 | 19 | 21 | 20 | 20 | 19 | 22 | 22 | 21 | 18 | 18 |
| Cornell University | 14 | 20 | 18 | 19 | 19 | 18 | 19 | 19 | 19 | 19 | 19 |
tail(top50)
#library(datapasta)
#vector_paste()
top50$V2[top50$V2 == "-"] <- NA
top50$V3[top50$V3 == "-"] <- NA
top50$V4[top50$V4 == "-"] <- NA
top50$V5[top50$V5 == "-"] <- NA
top50$V6[top50$V6 == "-"] <- NA
top50$V7[top50$V7 == "-"] <- NA
top50$V8[top50$V8 == "-"] <- NA
names(top50) <- c("Institution", "y2010", "y2011", "y2012", "y2013", "y2014", "y2015", "y2016", "y2017", "y2018", "y2019", "y2020")
top50 <- top50[-1, ]
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:kableExtra':
##
## group_rows
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
top50 <- top50 %>%
mutate_if(sapply(top50, is.factor), as.character)
head(top50)
head(top50)
tail(top50)
str(top50)
## 'data.frame': 50 obs. of 12 variables:
## $ Institution: chr "University of Oxford" "Stanford University" "Harvard University" "California Institute of Technology" ...
## $ y2010 : chr "6" "4" "1" "2" ...
## $ y2011 : chr "4" "2" "2" "1" ...
## $ y2012 : chr "2" "3" "4" "1" ...
## $ y2013 : chr "2" "4" "2" "1" ...
## $ y2014 : chr "3" "4" "2" "1" ...
## $ y2015 : chr "2" "3" "6" "1" ...
## $ y2016 : chr "1" "3" "6" "2" ...
## $ y2017 : chr "1" "3" "6" "3" ...
## $ y2018 : chr "1" "3" "6" "5" ...
## $ y2019 : chr "1" "4" "7" "2" ...
## $ y2020 : chr "1" "2" "3" "4" ...
top50 %>%
kable() %>%
kable_styling()
| Institution | y2010 | y2011 | y2012 | y2013 | y2014 | y2015 | y2016 | y2017 | y2018 | y2019 | y2020 |
|---|---|---|---|---|---|---|---|---|---|---|---|
| University of Oxford | 6 | 4 | 2 | 2 | 3 | 2 | 1 | 1 | 1 | 1 | 1 |
| Stanford University | 4 | 2 | 3 | 4 | 4 | 3 | 3 | 3 | 3 | 4 | 2 |
| Harvard University | 1 | 2 | 4 | 2 | 2 | 6 | 6 | 6 | 6 | 7 | 3 |
| California Institute of Technology | 2 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | 5 | 2 | 4 |
| Massachusetts Institute of Technology | 3 | 7 | 5 | 5 | 6 | 5 | 5 | 5 | 4 | 5 | 5 |
| University of Cambridge | 6 | 6 | 7 | 7 | 5 | 4 | 4 | 2 | 2 | 3 | 6 |
| University of California, Berkeley | 8 | 10 | 9 | 8 | 8 | 13 | 10 | 18 | 15 | 13 | 7 |
| Yale University | 10 | 11 | 11 | 11 | 9 | 12 | 12 | 12 | 8 | 8 | 8 |
| Princeton University | 5 | 5 | 6 | 6 | 7 | 7 | 7 | 7 | 7 | 6 | 9 |
| University of Chicago | 13 | 9 | 10 | 9 | 11 | 10 | 10 | 9 | 10 | 9 | 10 |
| Imperial College London | 9 | 8 | 8 | 10 | 9 | 8 | 8 | 8 | 9 | 10 | 11 |
| Johns Hopkins University | 13 | 14 | 16 | 15 | 15 | 11 | 17 | 13 | 12 | 12 | 12 |
| University of Pennsylvania | 1 | 16 | 15 | 16 | 16 | 17 | 13 | 10 | 12 | 11 | 13 |
| Swiss Federal Institute of Technology in Zurich | 15 | 15 | 12 | 14 | 13 | 9 | 9 | 10 | 11 | 13 | 14 |
| University of California, Los Angeles | 11 | 13 | 13 | 12 | 12 | 16 | 14 | 15 | 17 | 17 | 15 |
| University College London | 22 | 17 | 17 | 21 | 22 | 14 | 15 | 16 | 14 | 15 | 16 |
| Columbia University | 18 | 12 | 14 | 13 | 14 | 15 | 16 | 14 | 16 | 16 | 17 |
| University of Toronto | 17 | 19 | 21 | 20 | 20 | 19 | 22 | 22 | 21 | 18 | 18 |
| Cornell University | 14 | 20 | 18 | 19 | 19 | 18 | 19 | 19 | 19 | 19 | 19 |
| Duke University | 24 | 22 | 23 | 17 | 18 | 20 | 18 | 17 | 18 | 20 | 20 |
| Tsinghua University | 58 | 71 | 52 | 50 | 49 | 47 | 35 | 30 | 22 | 23 | 20 |
| University of Michigan | 15 | 18 | 20 | 18 | 17 | 21 | 21 | 21 | 20 | 21 | 22 |
| Peking University | 37 | 49 | 46 | 45 | 48 | 42 | 29 | 27 | 31 | 24 | 23 |
| Northwestern University | 25 | 26 | 19 | 22 | 21 | 25 | 20 | 20 | 25 | 22 | 24 |
| National University of Singapore | 34 | 40 | 29 | 26 | 25 | 26 | 24 | 22 | 23 | 25 | 25 |
| New York University | 60 | 44 | 41 | 40 | 38 | 30 | 32 | 27 | 27 | 29 | 26 |
| London School of Economics and Political Science | 86 | 47 | 39 | 32 | 34 | 23 | 25 | 25 | 26 | 27 | 27 |
| Carnegie Mellon University | 20 | 21 | 22 | 24 | 24 | 22 | 23 | 24 | 24 | 27 | 28 |
| University of Washington | 23 | 25 | 24 | 25 | 26 | 32 | 25 | 25 | 28 | 26 | 29 |
| University of Edinburgh | 40 | 36 | 32 | 39 | 36 | 24 | 27 | 27 | 29 | 30 | 30 |
| University of Melbourne | 36 | 37 | 28 | 34 | 33 | 33 | 33 | 32 | 32 | 32 | 31 |
| Ludwig Maximilian University of Munich | 61 | 45 | 48 | 55 | 29 | 29 | 30 | 34 | 32 | 32 | 32 |
| University of California, San Diego | 32 | 33 | 38 | 40 | 41 | 39 | 41 | 31 | 30 | 31 | 33 |
| University of British Columbia | 30 | 22 | 30 | 31 | 32 | 34 | 36 | 34 | 37 | 34 | 34 |
| King’s College London | 77 | 56 | 57 | 38 | 40 | 27 | 36 | 36 | 38 | 36 | 35 |
| Karolinska Institute | 43 | 32 | 42 | 36 | 44 | 28 | 28 | 38 | 40 | 41 | 36 |
| University of Tokyo | 26 | 30 | 27 | 23 | 23 | 43 | 39 | 46 | 42 | 36 | 36 |
| Georgia Institute of Technology | 27 | 24 | 25 | 28 | 27 | 41 | 33 | 33 | 34 | 38 | 38 |
| University of Hong Kong | 21 | 34 | 35 | 43 | 43 | 44 | 43 | 40 | 36 | 35 | 39 |
| McGill University | 35 | 28 | 34 | 35 | 39 | 38 | 42 | 42 | 44 | 42 | 40 |
| Technical University of Munich | 101 | 88 | 105 | 87 | 98 | 53 | 46 | 41 | 44 | 43 | 41 |
| Heidelberg University | 83 | 73 | 78 | 68 | 70 | 37 | 43 | 45 | 47 | 44 | 42 |
| École Polytechnique Fédérale de Lausanne | 48 | 46 | 40 | 37 | 34 | 31 | 30 | 38 | 35 | 38 | 43 |
| University of Texas at Austin | NA | 29 | 25 | 27 | 28 | 46 | 50 | 49 | 39 | 38 | 44 |
| Katholieke Universiteit Leuven | 119 | 67 | 58 | 61 | 55 | 35 | 40 | 47 | 48 | 45 | 45 |
| Université Paris Sciences et Lettres | NA | NA | NA | NA | NA | NA | NA | 72 | 41 | 45 | 46 |
| Nanyang Technological University | 174 | 169 | 86 | 76 | 61 | 55 | 54 | 52 | 51 | 48 | 47 |
| University of Illinois at Urbana–Champaign | 33 | 31 | 33 | 29 | 29 | 36 | 36 | 37 | 50 | 48 | 48 |
| University of Wisconsin-Madison | 43 | 27 | 31 | 30 | 29 | 50 | 45 | 43 | 43 | 51 | 49 |
| Washington University in St. Louis | 38 | 41 | 44 | 42 | 42 | 60 | 57 | 50 | 54 | 52 | 50 |
#library(labelled)
#variable.names(top50)
#var.labels <- c(Institution = "",
# y2010 = "2010-2011",
# y2011 = "2011-2012",
# y2012 = "2012-2013",
# y2013 = "2013-2014",
# y2014 = "2014-2015",
# y2015 = "2015-2016",
# y2016 = "2016-2017",
# y2017 = "2017-2018",
# y2018 = "2018-2019",
# y2019 = "2019-2020",
# y2020 = "2020-2021")
#top50 <- labelled::set_variable_labels(top50, .labels = var.labels)
#str(top50)
#head(top50)
write.csv(top50, file = "top50.csv")
df <- read.csv("top50.csv", row.names = NULL)
head(df, 20)
str(df)
## 'data.frame': 50 obs. of 13 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Institution: Factor w/ 50 levels "California Institute of Technology",..: 42 25 8 1 17 35 32 50 24 36 ...
## $ y2010 : int 6 4 1 2 3 6 8 10 5 13 ...
## $ y2011 : int 4 2 2 1 7 6 10 11 5 9 ...
## $ y2012 : int 2 3 4 1 5 7 9 11 6 10 ...
## $ y2013 : int 2 4 2 1 5 7 8 11 6 9 ...
## $ y2014 : int 3 4 2 1 6 5 8 9 7 11 ...
## $ y2015 : int 2 3 6 1 5 4 13 12 7 10 ...
## $ y2016 : int 1 3 6 2 5 4 10 12 7 10 ...
## $ y2017 : int 1 3 6 3 5 2 18 12 7 9 ...
## $ y2018 : int 1 3 6 5 4 2 15 8 7 10 ...
## $ y2019 : int 1 4 7 2 5 3 13 8 6 9 ...
## $ y2020 : int 1 2 3 4 5 6 7 8 9 10 ...
#top50 <- df %>% select("Institution", "y2010", "y2011", "y2012", "y2013", "y2014", "y2015", "y2016", "y2017", #"y2018", "y2019", "y2020")
#top50 <- top50 %>%
# mutate_if(sapply(top50, is.factor), as.character)
#head(top50, 20)
#str(top50)
#write.csv(top50, file = "top50.csv")
#df <- read.csv("top50.csv", row.names = NULL)
#str(df)
top50_1 <- top50 %>% select("Institution", "y2010", "y2020")
str(top50_1)
## 'data.frame': 50 obs. of 3 variables:
## $ Institution: chr "University of Oxford" "Stanford University" "Harvard University" "California Institute of Technology" ...
## $ y2010 : chr "6" "4" "1" "2" ...
## $ y2020 : chr "1" "2" "3" "4" ...
head(top50_1)
top50_10 <- top50_1[1:10,]
top50new <- broom::fix_data_frame(
t(top50_10),
newcol = "Year")
## Warning: This function is deprecated as of broom 0.7.0 and will be removed
## from a future release. Please see tibble::as_tibble().
top50new
colnames(top50new) <- top50new[1, ]
top50new <- top50new[-1, ]
top50new
top50new$Year <- top50new$Institution
top50new <- top50new[, -1]
str(top50new)
## tibble [2 x 11] (S3: tbl_df/tbl/data.frame)
## $ University of Oxford : chr [1:2] "6" "1"
## $ Stanford University : chr [1:2] "4" "2"
## $ Harvard University : chr [1:2] "1" "3"
## $ California Institute of Technology : chr [1:2] "2" "4"
## $ Massachusetts Institute of Technology: chr [1:2] "3" "5"
## $ University of Cambridge : chr [1:2] "6" "6"
## $ University of California, Berkeley : chr [1:2] "8" "7"
## $ Yale University : chr [1:2] "10" "8"
## $ Princeton University : chr [1:2] "5" "9"
## $ University of Chicago : chr [1:2] "13" "10"
## $ Year : chr [1:2] "y2010" "y2020"
newtop50 <- reshape2::melt(top50new, id="Year", variable.name="Institution", value.name = "Rate")
newtop50
newtop50$Year <- factor(newtop50$Year,
levels = c("y2010", "y2020"),
labels = c("2010-11","2020-21"),
ordered = TRUE)
head(newtop50)
library(ggplot2)
require(ggrepel)
## Loading required package: ggrepel
ggplot(data = newtop50, aes(x = Year, y = Rate, group = Institution)) +
geom_line(aes(color = Institution, alpha = 1), size = 1)+
# geom_text_repel(data = newtop50 %>% filter(Year == "2010-11"),
# aes(label = Institution) ,
# hjust = "left",
# fontface = "bold",
# size = 3,
# nudge_x = -.45,
# direction = "y") +
# geom_text_repel(data = newtop50 %>% filter(Year == "2020-21"),
# aes(label = Institution) ,
# hjust = "right",
# fontface = "bold",
# size = 3,
# nudge_x = .5,
# direction = "y") +
geom_label(aes(label = Rate),
size = 2.5,
label.padding = unit(0.05, "lines"),
label.size = 0.0) +
labs(
title = "The dinamics of top-10 Times Higher Education World University Rankings",
subtitle = "Based on: the most reliable source",
caption = "en.wikipedia.org/wiki/Times_Higher_Education_World_University_Rankings"
) +
theme_bw()
As the deadline is already passed (i was sure it was till 00.00, but i’ve noticed the real timing when it was too late, oh), I’ll write not much but still my small insight and comments on the works I’ve done.
First, sorry for another delay. I felt really bad the last couple of days and it was difficult for me to handle this work on time. Anyway, better late than never, I’ve kinda handled it (and now i feel at least a little bit happy).
I’ve decided to show you the table with the whole top-50 (just in case) and as for the graph I’ve placed 10 universities there.
From the graph we see that :
In general top10 have not really changed. Institutions, which are on top for today had nearly the same ratings before. Only University of Chicago was not presented in top-10 for 2010-11 years, it was on the 13th place in that rating. Now it is the 10th one.
The first two leading universities (Oxford and Stanford) had a really nice jump from 4th and 6th places to the 2nd and the 1st ones, moving the three leaders to later places (3 to 5).
Only one university shows some strong decrease in its rating - Princeton lost its 5th place moving to the 9th one.
For some interest let’s also have a look on extended a bit version of the same statistics: I’ve added some middle years - 2013 and 2016.
top50_1 <- top50 %>% select("Institution", "y2010", "y2013", "y2016", "y2020")
#str(top50_1)
#head(top50_1)
top50_10 <- top50_1[1:10,]
top50new <- broom::fix_data_frame(
t(top50_10),
newcol = "Year")
## Warning: This function is deprecated as of broom 0.7.0 and will be removed
## from a future release. Please see tibble::as_tibble().
#top50new
colnames(top50new) <- top50new[1, ]
top50new <- top50new[-1, ]
#top50new
top50new$Year <- top50new$Institution
top50new <- top50new[, -1]
#str(top50new)
newtop50 <- reshape2::melt(top50new, id="Year", variable.name="Institution", value.name = "Rate")
#newtop50
newtop50$Year <- factor(newtop50$Year,
levels = c("y2010", "y2013", "y2016", "y2020"),
labels = c("2010-11", "2013-14", "2016-17", "2020-21"),
ordered = TRUE)
#head(newtop50)
ggplot(data = newtop50, aes(x = Year, y = Rate, group = Institution)) +
geom_line(aes(color = Institution, alpha = 1), size = 1)+
geom_label(aes(label = Rate),
size = 2.5,
label.padding = unit(0.05, "lines"),
label.size = 0.0) +
labs(
title = "The dinamics of top-10 Times Higher Education World University Rankings\n(4 timepoints)",
subtitle = "Based on: the most reliable source",
caption = "en.wikipedia.org/wiki/Times_Higher_Education_World_University_Rankings"
) +
theme_bw()
Here we see a better look on the rapid jump of Oxford University, smoother slopes of Stanford, Princeton and two universities of Technology (both Californian and the one from Massachusetts), and also some highly fluctuated institutions like Harvard, Cambridge, Berkeley and Yale, which has dropped out of top-10 in the middle of the decade, but rapidly came back in its last half.