Notes Mar 24

Harold Nelson

3/22/2021

Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.4
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Get the Data

games <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-16/games.csv')
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   gamename = col_character(),
##   year = col_double(),
##   month = col_character(),
##   avg = col_double(),
##   gain = col_double(),
##   peak = col_double(),
##   avg_peak_perc = col_character()
## )
glimpse(games)
## Rows: 83,631
## Columns: 7
## $ gamename      <chr> "Counter-Strike: Global Offensive", "Dota 2", "PLAYERUN…
## $ year          <dbl> 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2021, 2…
## $ month         <chr> "February", "February", "February", "February", "Februa…
## $ avg           <dbl> 741013.24, 404832.13, 198957.52, 120982.64, 117742.27, …
## $ gain          <dbl> -2196.42, -27839.52, -2289.67, 49215.90, -24374.98, 180…
## $ peak          <dbl> 1123485, 651615, 447390, 196799, 224276, 133620, 146438…
## $ avg_peak_perc <chr> "65.9567%", "62.1275%", "44.4707%", "61.4752%", "52.498…
table(games$year)
## 
##  2012  2013  2014  2015  2016  2017  2018  2019  2020  2021 
##  1419  3957  5668  7688  9838 11676 12765 13622 14486  2512
table(games$month)
## 
##     April    August  December  February   January      July      June     March 
##      6435      6978      7391      7521      7447      6873      6602      6357 
##       May  November   October September 
##      6509      7276      7179      7063

Make a Date

Answer

games = games %>% 
  mutate(Date = mdy(paste(month,"1",year)))

head(games)
## # A tibble: 6 x 8
##   gamename            year month     avg    gain   peak avg_peak_perc Date      
##   <chr>              <dbl> <chr>   <dbl>   <dbl>  <dbl> <chr>         <date>    
## 1 Counter-Strike: G…  2021 Febru… 7.41e5  -2196. 1.12e6 65.9567%      2021-02-01
## 2 Dota 2              2021 Febru… 4.05e5 -27840. 6.52e5 62.1275%      2021-02-01
## 3 PLAYERUNKNOWN'S B…  2021 Febru… 1.99e5  -2290. 4.47e5 44.4707%      2021-02-01
## 4 Apex Legends        2021 Febru… 1.21e5  49216. 1.97e5 61.4752%      2021-02-01
## 5 Rust                2021 Febru… 1.18e5 -24375. 2.24e5 52.4988%      2021-02-01
## 6 Team Fortress 2     2021 Febru… 1.01e5  18083. 1.34e5 75.7603%      2021-02-01

All Time Leaders

Create a dataframe with the names of the games with the greatest sum of avg for all time.

Answer

All_time = games %>% 
   group_by(gamename) %>% 
   summarize(sum_avg = sum(avg)) %>% 
   arrange(desc(sum_avg)) %>% 
   ungroup() %>% 
   head(10)



All_time
## # A tibble: 10 x 2
##    gamename                           sum_avg
##    <chr>                                <dbl>
##  1 Dota 2                           47188121.
##  2 Counter-Strike: Global Offensive 32099063.
##  3 PLAYERUNKNOWN'S BATTLEGROUNDS    23137194.
##  4 Team Fortress 2                   5500066.
##  5 Grand Theft Auto V                4545350.
##  6 Tom Clancy's Rainbow Six Siege    3322836.
##  7 Warframe                          3214114.
##  8 ARK: Survival Evolved             2950450.
##  9 Sid Meier's Civilization V        2899576.
## 10 Rust                              2868306.

History

Create a dataframe All_time_hist with the complete records of the games in the All_time dataframe.

Answer

All_time_hist =
  All_time %>% 
  select(gamename) %>% 
  left_join(games)
## Joining, by = "gamename"
glimpse(All_time_hist)
## Rows: 853
## Columns: 8
## $ gamename      <chr> "Dota 2", "Dota 2", "Dota 2", "Dota 2", "Dota 2", "Dota…
## $ year          <dbl> 2021, 2021, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2…
## $ month         <chr> "February", "January", "December", "November", "October…
## $ avg           <dbl> 404832.1, 432671.7, 422552.3, 425904.8, 406361.4, 40860…
## $ gain          <dbl> -27839.52, 10119.33, -3352.52, 19543.48, -2248.43, -211…
## $ peak          <dbl> 651615, 694613, 697833, 711824, 723280, 670547, 666138,…
## $ avg_peak_perc <chr> "62.1275%", "62.2896%", "60.5521%", "59.8329%", "56.183…
## $ Date          <date> 2021-02-01, 2021-01-01, 2020-12-01, 2020-11-01, 2020-1…

Which months?

Show the months in which these games were listed.

Answer

table(All_time_hist$Date)
## 
## 2012-07-01 2012-08-01 2012-09-01 2012-10-01 2012-11-01 2012-12-01 2013-01-01 
##          4          4          4          4          4          4          5 
## 2013-02-01 2013-03-01 2013-04-01 2013-05-01 2013-06-01 2013-07-01 2013-08-01 
##          5          5          5          5          5          5          5 
## 2013-09-01 2013-10-01 2013-11-01 2013-12-01 2014-01-01 2014-02-01 2014-03-01 
##          5          5          5          6          6          6          6 
## 2014-04-01 2014-05-01 2014-06-01 2014-07-01 2014-08-01 2014-09-01 2014-10-01 
##          6          6          6          6          6          6          6 
## 2014-11-01 2014-12-01 2015-01-01 2015-02-01 2015-03-01 2015-04-01 2015-05-01 
##          6          6          6          6          6          7          8 
## 2015-06-01 2015-07-01 2015-08-01 2015-09-01 2015-10-01 2015-11-01 2015-12-01 
##          8          8          8          8          8          8          9 
## 2016-01-01 2016-02-01 2016-03-01 2016-04-01 2016-05-01 2016-06-01 2016-07-01 
##          9          9          9          9          9          9          9 
## 2016-08-01 2016-09-01 2016-10-01 2016-11-01 2016-12-01 2017-01-01 2017-02-01 
##          9          9          9          9          9          9          9 
## 2017-03-01 2017-04-01 2017-05-01 2017-06-01 2017-07-01 2017-08-01 2017-09-01 
##         10         10         10         10         10         10         10 
## 2017-10-01 2017-11-01 2017-12-01 2018-01-01 2018-02-01 2018-03-01 2018-04-01 
##         10         10         10         10         10         10         10 
## 2018-05-01 2018-06-01 2018-07-01 2018-08-01 2018-09-01 2018-10-01 2018-11-01 
##         10         10         10         10         10         10         10 
## 2018-12-01 2019-01-01 2019-02-01 2019-03-01 2019-04-01 2019-05-01 2019-06-01 
##         10         10         10         10         10         10         10 
## 2019-07-01 2019-08-01 2019-09-01 2019-10-01 2019-11-01 2019-12-01 2020-01-01 
##         10         10         10         10         10         10         10 
## 2020-02-01 2020-03-01 2020-04-01 2020-05-01 2020-06-01 2020-07-01 2020-08-01 
##         10         10         10         10         10         10         10 
## 2020-09-01 2020-10-01 2020-11-01 2020-12-01 2021-01-01 2021-02-01 
##         10         10         10         10         10         10

From the Beginning

Get a dataframe July12 of the top 10 games in July of 2012.

Answer

July12 = games %>% 
  filter(Date == as.Date("2012-07-01")) %>% 
  arrange(desc(avg)) %>% 
  head(10)

July12
## # A tibble: 10 x 8
##    gamename               year month    avg  gain  peak avg_peak_perc Date      
##    <chr>                 <dbl> <chr>  <dbl> <dbl> <dbl> <chr>         <date>    
##  1 Dota 2                 2012 July  52721.    NA 75041 70.2563%      2012-07-01
##  2 Team Fortress 2        2012 July  45567.    NA 62321 73.1164%      2012-07-01
##  3 Counter-Strike         2012 July  34139.    NA 53967 63.2594%      2012-07-01
##  4 Football Manager 2012  2012 July  30731.    NA 60437 50.8476%      2012-07-01
##  5 Counter-Strike: Sour…  2012 July  27970.    NA 47909 58.3805%      2012-07-01
##  6 Sid Meier's Civiliza…  2012 July  27448.    NA 50756 54.0792%      2012-07-01
##  7 Left 4 Dead 2          2012 July  12228.    NA 21811 56.0656%      2012-07-01
##  8 Terraria               2012 July   9023.    NA 16097 56.0525%      2012-07-01
##  9 Garry's Mod            2012 July   7966.    NA 12253 65.0122%      2012-07-01
## 10 Fallout: New Vegas     2012 July   7160.    NA 12756 56.1281%      2012-07-01

Fate of July 12 Group

Get July12_hist, which has the complete records of the group.

Answer

July12_hist = July12 %>% 
  select(gamename) %>% 
  left_join(games) 
## Joining, by = "gamename"
table(July12_hist$Date)
## 
## 2012-07-01 2012-08-01 2012-09-01 2012-10-01 2012-11-01 2012-12-01 2013-01-01 
##         10         10         10         10         10         10         10 
## 2013-02-01 2013-03-01 2013-04-01 2013-05-01 2013-06-01 2013-07-01 2013-08-01 
##         10         10         10         10         10         10         10 
## 2013-09-01 2013-10-01 2013-11-01 2013-12-01 2014-01-01 2014-02-01 2014-03-01 
##         10         10         10         10         10         10         10 
## 2014-04-01 2014-05-01 2014-06-01 2014-07-01 2014-08-01 2014-09-01 2014-10-01 
##         10         10         10         10         10         10         10 
## 2014-11-01 2014-12-01 2015-01-01 2015-02-01 2015-03-01 2015-04-01 2015-05-01 
##         10         10         10         10         10         10         10 
## 2015-06-01 2015-07-01 2015-08-01 2015-09-01 2015-10-01 2015-11-01 2015-12-01 
##         10         10         10         10         10         10         10 
## 2016-01-01 2016-02-01 2016-03-01 2016-04-01 2016-05-01 2016-06-01 2016-07-01 
##         10         10         10         10         10         10         10 
## 2016-08-01 2016-09-01 2016-10-01 2016-11-01 2016-12-01 2017-01-01 2017-02-01 
##         10         10         10         10         10         10         10 
## 2017-03-01 2017-04-01 2017-05-01 2017-06-01 2017-07-01 2017-08-01 2017-09-01 
##         10         10         10         10         10         10         10 
## 2017-10-01 2017-11-01 2017-12-01 2018-01-01 2018-02-01 2018-03-01 2018-04-01 
##         10         10         10         10         10         10         10 
## 2018-05-01 2018-06-01 2018-07-01 2018-08-01 2018-09-01 2018-10-01 2018-11-01 
##         10         10         10         10         10         10         10 
## 2018-12-01 2019-01-01 2019-02-01 2019-03-01 2019-04-01 2019-05-01 2019-06-01 
##         10         10         10         10         10         10         10 
## 2019-07-01 2019-08-01 2019-09-01 2019-10-01 2019-11-01 2019-12-01 2020-01-01 
##         10         10         10         10         10         10         10 
## 2020-02-01 2020-03-01 2020-04-01 2020-05-01 2020-06-01 2020-07-01 2020-08-01 
##         10         10         10         10         10         10         10 
## 2020-09-01 2020-10-01 2020-11-01 2020-12-01 2021-01-01 2021-02-01 
##         10         10         10         10         10         10

July12 and All_time.

How would you systematically check to see how many of the July12 group are on the All_time list?

Answer

July12 %>% 
  select(gamename) %>% 
  left_join(All_time) %>% 
  filter(!is.na(sum_avg))
## Joining, by = "gamename"
## # A tibble: 3 x 2
##   gamename                     sum_avg
##   <chr>                          <dbl>
## 1 Dota 2                     47188121.
## 2 Team Fortress 2             5500066.
## 3 Sid Meier's Civilization V  2899576.