rating <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2022/2022-01-25/ratings.csv', show_col_types = FALSE)
rating
## # A tibble: 21,831 × 10
## num id name year rank average bayes_average users_rated url
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 105 30549 Pandemic 2008 106 7.59 7.49 108975 /boa…
## 2 189 822 Carcassonne 2000 190 7.42 7.31 108738 /boa…
## 3 428 13 Catan 1995 429 7.14 6.97 108024 /boa…
## 4 72 68448 7 Wonders 2010 73 7.74 7.63 89982 /boa…
## 5 103 36218 Dominion 2008 104 7.61 7.50 81561 /boa…
## 6 191 9209 Ticket to R… 2004 192 7.41 7.30 76171 /boa…
## 7 100 178900 Codenames 2015 101 7.6 7.51 74419 /boa…
## 8 3 167791 Terraformin… 2016 4 8.42 8.27 74216 /boa…
## 9 15 173346 7 Wonders D… 2015 16 8.11 7.98 69472 /boa…
## 10 35 31260 Agricola 2007 36 7.93 7.81 66093 /boa…
## # ℹ 21,821 more rows
## # ℹ 1 more variable: thumbnail <chr>
ratings <- head(rating, 50) %>% select(name:rank)
ratings
## # A tibble: 50 × 3
## name year rank
## <chr> <dbl> <dbl>
## 1 Pandemic 2008 106
## 2 Carcassonne 2000 190
## 3 Catan 1995 429
## 4 7 Wonders 2010 73
## 5 Dominion 2008 104
## 6 Ticket to Ride 2004 192
## 7 Codenames 2015 101
## 8 Terraforming Mars 2016 4
## 9 7 Wonders Duel 2015 16
## 10 Agricola 2007 36
## # ℹ 40 more rows
ratings$year
## [1] 2008 2000 1995 2010 2008 2004 2015 2016 2015 2007 2002 2014 2016 2009 2005
## [16] 2017 2011 2012 2004 2019 2008 2014 2011 2000 2007 2012 2017 2008 2015 2010
## [31] 2001 2005 2012 2010 2014 2009 1997 2004 1999 2012 2014 2013 2011 2015 2005
## [46] 2009 2012 2016 2015 2015
# Must use wildcard to prevent year 2019
str_detect(ratings$year, "19.")
## [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE
sum(str_detect(ratings$year, "19."))
## [1] 3
ratings %>%
summarise(amt_1900s = sum(str_detect(year, "19.")))
## # A tibble: 1 × 1
## amt_1900s
## <int>
## 1 3
ratings %>%
mutate(col_1900s = str_extract(year, "19.")) %>%
filter(!is.na(col_1900s))
## # A tibble: 3 × 4
## name year rank col_1900s
## <chr> <dbl> <dbl> <chr>
## 1 Catan 1995 429 199
## 2 Bohnanza 1997 473 199
## 3 Lost Cities 1999 324 199
ratings %>%
mutate(y2k_killed_1900s = str_replace(year, "19.", "200")) %>%
select(name, rank:y2k_killed_1900s)
## # A tibble: 50 × 3
## name rank y2k_killed_1900s
## <chr> <dbl> <chr>
## 1 Pandemic 106 2008
## 2 Carcassonne 190 2000
## 3 Catan 429 2005
## 4 7 Wonders 73 2010
## 5 Dominion 104 2008
## 6 Ticket to Ride 192 2004
## 7 Codenames 101 2015
## 8 Terraforming Mars 4 2016
## 9 7 Wonders Duel 16 2015
## 10 Agricola 36 2007
## # ℹ 40 more rows