library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(CGPfunctions)
wikidata <- "https://en.wikipedia.org/wiki/Times_Higher_Education_World_University_Rankings"
data1 <- wikidata%>%
read_html()%>%
html_table(fill=TRUE)
names(rmarkdown::paged_table(data1[[3]]))
## [1] "Institution" "2023[46]" "2022[47]" "2021[48]" "2020[49]"
## [6] "2019[50]" "2018[51]" "2017[52]" "2016[53]" "2015[54]"
## [11] "2014[55]" "2013[56]" "2012[57]"
rmarkdown::paged_table(data1[[3]])
rating
rating<-data1[[3]]
rating
## # A tibble: 10 × 13
## Institution `2023[46]` `2022[47]` `2021[48]` `2020[49]` `2019[50]` `2018[51]`
## <chr> <int> <int> <int> <int> <int> <int>
## 1 University… 1 1 1 1 1 1
## 2 Harvard Un… 2 2 3 7 6 6
## 3 University… 3 5 6 3 2 2
## 4 Stanford U… 3 4 2 4 3 3
## 5 Massachuse… 5 5 5 5 4 5
## 6 California… 6 2 4 2 5 3
## 7 Princeton … 7 7 9 6 7 7
## 8 University… 8 8 7 13 15 18
## 9 Yale Unive… 9 9 8 8 8 12
## 10 Imperial C… 10 12 11 10 9 8
## # ℹ 6 more variables: `2017[52]` <int>, `2016[53]` <int>, `2015[54]` <int>,
## # `2014[55]` <int>, `2013[56]` <int>, `2012[57]` <int>
summary(rating)
## Institution 2023[46] 2022[47] 2021[48]
## Length:10 Min. : 1.00 Min. : 1.00 Min. : 1.00
## Class :character 1st Qu.: 3.00 1st Qu.: 2.50 1st Qu.: 3.25
## Mode :character Median : 5.50 Median : 5.00 Median : 5.50
## Mean : 5.40 Mean : 5.50 Mean : 5.60
## 3rd Qu.: 7.75 3rd Qu.: 7.75 3rd Qu.: 7.75
## Max. :10.00 Max. :12.00 Max. :11.00
## 2020[49] 2019[50] 2018[51] 2017[52]
## Min. : 1.00 Min. : 1.00 Min. : 1.00 Min. : 1.00
## 1st Qu.: 3.25 1st Qu.: 3.25 1st Qu.: 3.00 1st Qu.: 3.25
## Median : 5.50 Median : 5.50 Median : 5.50 Median : 5.50
## Mean : 5.90 Mean : 6.00 Mean : 6.50 Mean : 5.80
## 3rd Qu.: 7.75 3rd Qu.: 7.75 3rd Qu.: 7.75 3rd Qu.: 7.75
## Max. :13.00 Max. :15.00 Max. :18.00 Max. :12.00
## 2016[53] 2015[54] 2014[55] 2013[56] 2012[57]
## Min. : 1.00 Min. :1.00 Min. : 1.00 Min. : 1.00 Min. : 1.00
## 1st Qu.: 3.25 1st Qu.:3.25 1st Qu.: 2.50 1st Qu.: 3.25 1st Qu.: 2.50
## Median : 5.50 Median :5.50 Median : 5.50 Median : 5.50 Median : 5.50
## Mean : 6.10 Mean :5.40 Mean : 5.60 Mean : 5.60 Mean : 5.60
## 3rd Qu.: 7.75 3rd Qu.:7.75 3rd Qu.: 7.75 3rd Qu.: 7.75 3rd Qu.: 7.75
## Max. :13.00 Max. :9.00 Max. :11.00 Max. :11.00 Max. :11.00
str(rating)
## tibble [10 × 13] (S3: tbl_df/tbl/data.frame)
## $ Institution: chr [1:10] "University of Oxford" "Harvard University" "University of Cambridge" "Stanford University" ...
## $ 2023[46] : int [1:10] 1 2 3 3 5 6 7 8 9 10
## $ 2022[47] : int [1:10] 1 2 5 4 5 2 7 8 9 12
## $ 2021[48] : int [1:10] 1 3 6 2 5 4 9 7 8 11
## $ 2020[49] : int [1:10] 1 7 3 4 5 2 6 13 8 10
## $ 2019[50] : int [1:10] 1 6 2 3 4 5 7 15 8 9
## $ 2018[51] : int [1:10] 1 6 2 3 5 3 7 18 12 8
## $ 2017[52] : int [1:10] 1 6 4 3 5 2 7 10 12 8
## $ 2016[53] : int [1:10] 2 6 4 3 5 1 7 13 12 8
## $ 2015[54] : int [1:10] 3 2 5 4 6 1 7 8 9 9
## $ 2014[55] : int [1:10] 2 2 7 4 5 1 6 8 11 10
## $ 2013[56] : int [1:10] 2 4 7 3 5 1 6 9 11 8
## $ 2012[57] : int [1:10] 4 2 6 2 7 1 5 10 11 8
rating[,2:12]<-replace(rating[,2:12],rating[,2:12]=="-", NA)
rating
## # A tibble: 10 × 13
## Institution `2023[46]` `2022[47]` `2021[48]` `2020[49]` `2019[50]` `2018[51]`
## <chr> <int> <int> <int> <int> <int> <int>
## 1 University… 1 1 1 1 1 1
## 2 Harvard Un… 2 2 3 7 6 6
## 3 University… 3 5 6 3 2 2
## 4 Stanford U… 3 4 2 4 3 3
## 5 Massachuse… 5 5 5 5 4 5
## 6 California… 6 2 4 2 5 3
## 7 Princeton … 7 7 9 6 7 7
## 8 University… 8 8 7 13 15 18
## 9 Yale Unive… 9 9 8 8 8 12
## 10 Imperial C… 10 12 11 10 9 8
## # ℹ 6 more variables: `2017[52]` <int>, `2016[53]` <int>, `2015[54]` <int>,
## # `2014[55]` <int>, `2013[56]` <int>, `2012[57]` <int>
rating[,2:12] <- (lapply(rating[,2:12], as.numeric))
names(rating) <- c("University", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015", "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021")
## Warning: The `value` argument of `names<-` must have the same length as `x` as of tibble
## 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
rmarkdown::paged_table(rating)
TOP 10
top10uni <- head(rating, 10)
top10uni
## # A tibble: 10 × 13
## University `2010-2011` `2011-2012` `2012-2013` `2013-2014` `2014-2015`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 University of Ox… 1 1 1 1 1
## 2 Harvard Universi… 2 2 3 7 6
## 3 University of Ca… 3 5 6 3 2
## 4 Stanford Univers… 3 4 2 4 3
## 5 Massachusetts In… 5 5 5 5 4
## 6 California Insti… 6 2 4 2 5
## 7 Princeton Univer… 7 7 9 6 7
## 8 University of Ca… 8 8 7 13 15
## 9 Yale University 9 9 8 8 8
## 10 Imperial College… 10 12 11 10 9
## # ℹ 7 more variables: `2015-2016` <dbl>, `2016-2017` <dbl>, `2017-2018` <dbl>,
## # `2018-2019` <dbl>, `2019-2020` <dbl>, `2020-2021` <dbl>, `` <int>
top10uni%>%
knitr:: kable()
| University | 2010-2011 | 2011-2012 | 2012-2013 | 2013-2014 | 2014-2015 | 2015-2016 | 2016-2017 | 2017-2018 | 2018-2019 | 2019-2020 | 2020-2021 | NA |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| University of Oxford | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | 2 | 2 | 4 |
| Harvard University | 2 | 2 | 3 | 7 | 6 | 6 | 6 | 6 | 2 | 2 | 4 | 2 |
| University of Cambridge | 3 | 5 | 6 | 3 | 2 | 2 | 4 | 4 | 5 | 7 | 7 | 6 |
| Stanford University | 3 | 4 | 2 | 4 | 3 | 3 | 3 | 3 | 4 | 4 | 3 | 2 |
| Massachusetts Institute of Technology | 5 | 5 | 5 | 5 | 4 | 5 | 5 | 5 | 6 | 5 | 5 | 7 |
| California Institute of Technology | 6 | 2 | 4 | 2 | 5 | 3 | 2 | 1 | 1 | 1 | 1 | 1 |
| Princeton University | 7 | 7 | 9 | 6 | 7 | 7 | 7 | 7 | 7 | 6 | 6 | 5 |
| University of California, Berkeley | 8 | 8 | 7 | 13 | 15 | 18 | 10 | 13 | 8 | 8 | 9 | 10 |
| Yale University | 9 | 9 | 8 | 8 | 8 | 12 | 12 | 12 | 9 | 11 | 11 | 11 |
| Imperial College London | 10 | 12 | 11 | 10 | 9 | 8 | 8 | 8 | 9 | 10 | 8 | 8 |
top10uni2<-top10uni
top10uni <- top10uni[,-c(3:12)]
top10uni
## # A tibble: 10 × 3
## University `2010-2011` ``
## <chr> <dbl> <int>
## 1 University of Oxford 1 4
## 2 Harvard University 2 2
## 3 University of Cambridge 3 6
## 4 Stanford University 3 2
## 5 Massachusetts Institute of Technology 5 7
## 6 California Institute of Technology 6 1
## 7 Princeton University 7 5
## 8 University of California, Berkeley 8 10
## 9 Yale University 9 11
## 10 Imperial College London 10 8
year1 <- c("2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011")
top10uni <- cbind(top10uni, year1)
top10uni
## University 2010-2011 NA year1
## 1 University of Oxford 1 4 2011
## 2 Harvard University 2 2 2011
## 3 University of Cambridge 3 6 2011
## 4 Stanford University 3 2 2011
## 5 Massachusetts Institute of Technology 5 7 2011
## 6 California Institute of Technology 6 1 2011
## 7 Princeton University 7 5 2011
## 8 University of California, Berkeley 8 10 2011
## 9 Yale University 9 11 2011
## 10 Imperial College London 10 8 2011
top10uni2 <- top10uni2[,-c(2:11)]
year2 <- c("2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021")
top10uni2 <- cbind(top10uni2, year2)
names(top10uni) <- c("uni", "rating", "year")
names(top10uni2) <- c("uni", "rating", "year")
topuni <- rbind(top10uni, top10uni2)
topuni%>%
knitr:: kable()
| uni | rating | year | NA |
|---|---|---|---|
| University of Oxford | 1 | 4 | 2011 |
| Harvard University | 2 | 2 | 2011 |
| University of Cambridge | 3 | 6 | 2011 |
| Stanford University | 3 | 2 | 2011 |
| Massachusetts Institute of Technology | 5 | 7 | 2011 |
| California Institute of Technology | 6 | 1 | 2011 |
| Princeton University | 7 | 5 | 2011 |
| University of California, Berkeley | 8 | 10 | 2011 |
| Yale University | 9 | 11 | 2011 |
| Imperial College London | 10 | 8 | 2011 |
| University of Oxford | 2 | 4 | 2021 |
| Harvard University | 4 | 2 | 2021 |
| University of Cambridge | 7 | 6 | 2021 |
| Stanford University | 3 | 2 | 2021 |
| Massachusetts Institute of Technology | 5 | 7 | 2021 |
| California Institute of Technology | 1 | 1 | 2021 |
| Princeton University | 6 | 5 | 2021 |
| University of California, Berkeley | 9 | 10 | 2021 |
| Yale University | 11 | 11 | 2021 |
| Imperial College London | 8 | 8 | 2021 |
topuni
## uni rating year NA
## 1 University of Oxford 1 4 2011
## 2 Harvard University 2 2 2011
## 3 University of Cambridge 3 6 2011
## 4 Stanford University 3 2 2011
## 5 Massachusetts Institute of Technology 5 7 2011
## 6 California Institute of Technology 6 1 2011
## 7 Princeton University 7 5 2011
## 8 University of California, Berkeley 8 10 2011
## 9 Yale University 9 11 2011
## 10 Imperial College London 10 8 2011
## 11 University of Oxford 2 4 2021
## 12 Harvard University 4 2 2021
## 13 University of Cambridge 7 6 2021
## 14 Stanford University 3 2 2021
## 15 Massachusetts Institute of Technology 5 7 2021
## 16 California Institute of Technology 1 1 2021
## 17 Princeton University 6 5 2021
## 18 University of California, Berkeley 9 10 2021
## 19 Yale University 11 11 2021
## 20 Imperial College London 8 8 2021