library(rvest)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(CGPfunctions)
wikidata <- "https://en.wikipedia.org/wiki/Times_Higher_Education_World_University_Rankings"
data1 <- wikidata%>%
  read_html()%>%
  html_table(fill=TRUE)
names(rmarkdown::paged_table(data1[[3]]))
##  [1] "Institution" "2023[46]"    "2022[47]"    "2021[48]"    "2020[49]"   
##  [6] "2019[50]"    "2018[51]"    "2017[52]"    "2016[53]"    "2015[54]"   
## [11] "2014[55]"    "2013[56]"    "2012[57]"
rmarkdown::paged_table(data1[[3]])

rating

rating<-data1[[3]]
rating
## # A tibble: 10 × 13
##    Institution `2023[46]` `2022[47]` `2021[48]` `2020[49]` `2019[50]` `2018[51]`
##    <chr>            <int>      <int>      <int>      <int>      <int>      <int>
##  1 University…          1          1          1          1          1          1
##  2 Harvard Un…          2          2          3          7          6          6
##  3 University…          3          5          6          3          2          2
##  4 Stanford U…          3          4          2          4          3          3
##  5 Massachuse…          5          5          5          5          4          5
##  6 California…          6          2          4          2          5          3
##  7 Princeton …          7          7          9          6          7          7
##  8 University…          8          8          7         13         15         18
##  9 Yale Unive…          9          9          8          8          8         12
## 10 Imperial C…         10         12         11         10          9          8
## # ℹ 6 more variables: `2017[52]` <int>, `2016[53]` <int>, `2015[54]` <int>,
## #   `2014[55]` <int>, `2013[56]` <int>, `2012[57]` <int>
summary(rating)
##  Institution           2023[46]        2022[47]        2021[48]    
##  Length:10          Min.   : 1.00   Min.   : 1.00   Min.   : 1.00  
##  Class :character   1st Qu.: 3.00   1st Qu.: 2.50   1st Qu.: 3.25  
##  Mode  :character   Median : 5.50   Median : 5.00   Median : 5.50  
##                     Mean   : 5.40   Mean   : 5.50   Mean   : 5.60  
##                     3rd Qu.: 7.75   3rd Qu.: 7.75   3rd Qu.: 7.75  
##                     Max.   :10.00   Max.   :12.00   Max.   :11.00  
##     2020[49]        2019[50]        2018[51]        2017[52]    
##  Min.   : 1.00   Min.   : 1.00   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.: 3.25   1st Qu.: 3.25   1st Qu.: 3.00   1st Qu.: 3.25  
##  Median : 5.50   Median : 5.50   Median : 5.50   Median : 5.50  
##  Mean   : 5.90   Mean   : 6.00   Mean   : 6.50   Mean   : 5.80  
##  3rd Qu.: 7.75   3rd Qu.: 7.75   3rd Qu.: 7.75   3rd Qu.: 7.75  
##  Max.   :13.00   Max.   :15.00   Max.   :18.00   Max.   :12.00  
##     2016[53]        2015[54]       2014[55]        2013[56]        2012[57]    
##  Min.   : 1.00   Min.   :1.00   Min.   : 1.00   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.: 3.25   1st Qu.:3.25   1st Qu.: 2.50   1st Qu.: 3.25   1st Qu.: 2.50  
##  Median : 5.50   Median :5.50   Median : 5.50   Median : 5.50   Median : 5.50  
##  Mean   : 6.10   Mean   :5.40   Mean   : 5.60   Mean   : 5.60   Mean   : 5.60  
##  3rd Qu.: 7.75   3rd Qu.:7.75   3rd Qu.: 7.75   3rd Qu.: 7.75   3rd Qu.: 7.75  
##  Max.   :13.00   Max.   :9.00   Max.   :11.00   Max.   :11.00   Max.   :11.00
str(rating)
## tibble [10 × 13] (S3: tbl_df/tbl/data.frame)
##  $ Institution: chr [1:10] "University of Oxford" "Harvard University" "University of Cambridge" "Stanford University" ...
##  $ 2023[46]   : int [1:10] 1 2 3 3 5 6 7 8 9 10
##  $ 2022[47]   : int [1:10] 1 2 5 4 5 2 7 8 9 12
##  $ 2021[48]   : int [1:10] 1 3 6 2 5 4 9 7 8 11
##  $ 2020[49]   : int [1:10] 1 7 3 4 5 2 6 13 8 10
##  $ 2019[50]   : int [1:10] 1 6 2 3 4 5 7 15 8 9
##  $ 2018[51]   : int [1:10] 1 6 2 3 5 3 7 18 12 8
##  $ 2017[52]   : int [1:10] 1 6 4 3 5 2 7 10 12 8
##  $ 2016[53]   : int [1:10] 2 6 4 3 5 1 7 13 12 8
##  $ 2015[54]   : int [1:10] 3 2 5 4 6 1 7 8 9 9
##  $ 2014[55]   : int [1:10] 2 2 7 4 5 1 6 8 11 10
##  $ 2013[56]   : int [1:10] 2 4 7 3 5 1 6 9 11 8
##  $ 2012[57]   : int [1:10] 4 2 6 2 7 1 5 10 11 8
rating[,2:12]<-replace(rating[,2:12],rating[,2:12]=="-", NA)
rating
## # A tibble: 10 × 13
##    Institution `2023[46]` `2022[47]` `2021[48]` `2020[49]` `2019[50]` `2018[51]`
##    <chr>            <int>      <int>      <int>      <int>      <int>      <int>
##  1 University…          1          1          1          1          1          1
##  2 Harvard Un…          2          2          3          7          6          6
##  3 University…          3          5          6          3          2          2
##  4 Stanford U…          3          4          2          4          3          3
##  5 Massachuse…          5          5          5          5          4          5
##  6 California…          6          2          4          2          5          3
##  7 Princeton …          7          7          9          6          7          7
##  8 University…          8          8          7         13         15         18
##  9 Yale Unive…          9          9          8          8          8         12
## 10 Imperial C…         10         12         11         10          9          8
## # ℹ 6 more variables: `2017[52]` <int>, `2016[53]` <int>, `2015[54]` <int>,
## #   `2014[55]` <int>, `2013[56]` <int>, `2012[57]` <int>
rating[,2:12] <- (lapply(rating[,2:12], as.numeric))
names(rating) <- c("University", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015", "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021")
## Warning: The `value` argument of `names<-` must have the same length as `x` as of tibble
## 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
rmarkdown::paged_table(rating)

TOP 10

top10uni <- head(rating, 10)
top10uni
## # A tibble: 10 × 13
##    University        `2010-2011` `2011-2012` `2012-2013` `2013-2014` `2014-2015`
##    <chr>                   <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
##  1 University of Ox…           1           1           1           1           1
##  2 Harvard Universi…           2           2           3           7           6
##  3 University of Ca…           3           5           6           3           2
##  4 Stanford Univers…           3           4           2           4           3
##  5 Massachusetts In…           5           5           5           5           4
##  6 California Insti…           6           2           4           2           5
##  7 Princeton Univer…           7           7           9           6           7
##  8 University of Ca…           8           8           7          13          15
##  9 Yale University             9           9           8           8           8
## 10 Imperial College…          10          12          11          10           9
## # ℹ 7 more variables: `2015-2016` <dbl>, `2016-2017` <dbl>, `2017-2018` <dbl>,
## #   `2018-2019` <dbl>, `2019-2020` <dbl>, `2020-2021` <dbl>, `` <int>
top10uni%>%
  knitr:: kable()
University 2010-2011 2011-2012 2012-2013 2013-2014 2014-2015 2015-2016 2016-2017 2017-2018 2018-2019 2019-2020 2020-2021 NA
University of Oxford 1 1 1 1 1 1 1 2 3 2 2 4
Harvard University 2 2 3 7 6 6 6 6 2 2 4 2
University of Cambridge 3 5 6 3 2 2 4 4 5 7 7 6
Stanford University 3 4 2 4 3 3 3 3 4 4 3 2
Massachusetts Institute of Technology 5 5 5 5 4 5 5 5 6 5 5 7
California Institute of Technology 6 2 4 2 5 3 2 1 1 1 1 1
Princeton University 7 7 9 6 7 7 7 7 7 6 6 5
University of California, Berkeley 8 8 7 13 15 18 10 13 8 8 9 10
Yale University 9 9 8 8 8 12 12 12 9 11 11 11
Imperial College London 10 12 11 10 9 8 8 8 9 10 8 8
top10uni2<-top10uni

top10uni <- top10uni[,-c(3:12)] 
top10uni
## # A tibble: 10 × 3
##    University                            `2010-2011`    ``
##    <chr>                                       <dbl> <int>
##  1 University of Oxford                            1     4
##  2 Harvard University                              2     2
##  3 University of Cambridge                         3     6
##  4 Stanford University                             3     2
##  5 Massachusetts Institute of Technology           5     7
##  6 California Institute of Technology              6     1
##  7 Princeton University                            7     5
##  8 University of California, Berkeley              8    10
##  9 Yale University                                 9    11
## 10 Imperial College London                        10     8
year1 <- c("2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011", "2011")
top10uni <- cbind(top10uni, year1)
top10uni
##                               University 2010-2011 NA year1
## 1                   University of Oxford         1  4  2011
## 2                     Harvard University         2  2  2011
## 3                University of Cambridge         3  6  2011
## 4                    Stanford University         3  2  2011
## 5  Massachusetts Institute of Technology         5  7  2011
## 6     California Institute of Technology         6  1  2011
## 7                   Princeton University         7  5  2011
## 8     University of California, Berkeley         8 10  2011
## 9                        Yale University         9 11  2011
## 10               Imperial College London        10  8  2011
top10uni2 <- top10uni2[,-c(2:11)] 
year2 <- c("2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021")
top10uni2 <- cbind(top10uni2, year2)

names(top10uni) <- c("uni", "rating", "year")
names(top10uni2) <- c("uni", "rating", "year")
topuni <- rbind(top10uni, top10uni2)

topuni%>%
  knitr:: kable()
uni rating year NA
University of Oxford 1 4 2011
Harvard University 2 2 2011
University of Cambridge 3 6 2011
Stanford University 3 2 2011
Massachusetts Institute of Technology 5 7 2011
California Institute of Technology 6 1 2011
Princeton University 7 5 2011
University of California, Berkeley 8 10 2011
Yale University 9 11 2011
Imperial College London 10 8 2011
University of Oxford 2 4 2021
Harvard University 4 2 2021
University of Cambridge 7 6 2021
Stanford University 3 2 2021
Massachusetts Institute of Technology 5 7 2021
California Institute of Technology 1 1 2021
Princeton University 6 5 2021
University of California, Berkeley 9 10 2021
Yale University 11 11 2021
Imperial College London 8 8 2021
topuni
##                                      uni rating year   NA
## 1                   University of Oxford      1    4 2011
## 2                     Harvard University      2    2 2011
## 3                University of Cambridge      3    6 2011
## 4                    Stanford University      3    2 2011
## 5  Massachusetts Institute of Technology      5    7 2011
## 6     California Institute of Technology      6    1 2011
## 7                   Princeton University      7    5 2011
## 8     University of California, Berkeley      8   10 2011
## 9                        Yale University      9   11 2011
## 10               Imperial College London     10    8 2011
## 11                  University of Oxford      2    4 2021
## 12                    Harvard University      4    2 2021
## 13               University of Cambridge      7    6 2021
## 14                   Stanford University      3    2 2021
## 15 Massachusetts Institute of Technology      5    7 2021
## 16    California Institute of Technology      1    1 2021
## 17                  Princeton University      6    5 2021
## 18    University of California, Berkeley      9   10 2021
## 19                       Yale University     11   11 2021
## 20               Imperial College London      8    8 2021