1

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dataset_olympics <- read.csv("~/School/Data 101/dataset_olympics.csv")

2

dim(dataset_olympics)
## [1] 70000    15
summary(dataset_olympics)
##        ID            Name               Sex                 Age       
##  Min.   :    1   Length:70000       Length:70000       Min.   :11.00  
##  1st Qu.: 9326   Class :character   Class :character   1st Qu.:21.00  
##  Median :18032   Mode  :character   Mode  :character   Median :25.00  
##  Mean   :18082                                         Mean   :25.64  
##  3rd Qu.:26978                                         3rd Qu.:28.00  
##  Max.   :35658                                         Max.   :88.00  
##                                                        NA's   :2732   
##      Height          Weight          Team               NOC           
##  Min.   :127.0   Min.   : 25.0   Length:70000       Length:70000      
##  1st Qu.:168.0   1st Qu.: 61.0   Class :character   Class :character  
##  Median :175.0   Median : 70.0   Mode  :character   Mode  :character  
##  Mean   :175.5   Mean   : 70.9                                        
##  3rd Qu.:183.0   3rd Qu.: 79.0                                        
##  Max.   :223.0   Max.   :214.0                                        
##  NA's   :16254   NA's   :17101                                        
##     Games                Year         Season              City          
##  Length:70000       Min.   :1896   Length:70000       Length:70000      
##  Class :character   1st Qu.:1960   Class :character   Class :character  
##  Mode  :character   Median :1984   Mode  :character   Mode  :character  
##                     Mean   :1978                                        
##                     3rd Qu.:2002                                        
##                     Max.   :2016                                        
##                                                                         
##     Sport              Event              Medal          
##  Length:70000       Length:70000       Length:70000      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 
str(dataset_olympics)
## 'data.frame':    70000 obs. of  15 variables:
##  $ ID    : int  1 2 3 4 5 5 5 5 5 5 ...
##  $ Name  : chr  "A Dijiang" "A Lamusi" "Gunnar Nielsen Aaby" "Edgar Lindenau Aabye" ...
##  $ Sex   : chr  "M" "M" "M" "M" ...
##  $ Age   : num  24 23 24 34 21 21 25 25 27 27 ...
##  $ Height: num  180 170 NA NA 185 185 185 185 185 185 ...
##  $ Weight: num  80 60 NA NA 82 82 82 82 82 82 ...
##  $ Team  : chr  "China" "China" "Denmark" "Denmark/Sweden" ...
##  $ NOC   : chr  "CHN" "CHN" "DEN" "DEN" ...
##  $ Games : chr  "1992 Summer" "2012 Summer" "1920 Summer" "1900 Summer" ...
##  $ Year  : int  1992 2012 1920 1900 1988 1988 1992 1992 1994 1994 ...
##  $ Season: chr  "Summer" "Summer" "Summer" "Summer" ...
##  $ City  : chr  "Barcelona" "London" "Antwerpen" "Paris" ...
##  $ Sport : chr  "Basketball" "Judo" "Football" "Tug-Of-War" ...
##  $ Event : chr  "Basketball Men's Basketball" "Judo Men's Extra-Lightweight" "Football Men's Football" "Tug-Of-War Men's Tug-Of-War" ...
##  $ Medal : chr  "" "" "" "Gold" ...
colSums(is.na(dataset_olympics))
##     ID   Name    Sex    Age Height Weight   Team    NOC  Games   Year Season 
##      0      0      0   2732  16254  17101      0      0      0      0      0 
##   City  Sport  Event  Medal 
##      0      0      0      0

Age:2732 Height:16254 Weight:17101

3

dataset_olympics %>%
  select(City, everything()) %>%
  slice(1:10)
##           City ID                     Name Sex Age Height Weight           Team
## 1    Barcelona  1                A Dijiang   M  24    180     80          China
## 2       London  2                 A Lamusi   M  23    170     60          China
## 3    Antwerpen  3      Gunnar Nielsen Aaby   M  24     NA     NA        Denmark
## 4        Paris  4     Edgar Lindenau Aabye   M  34     NA     NA Denmark/Sweden
## 5      Calgary  5 Christine Jacoba Aaftink   F  21    185     82    Netherlands
## 6      Calgary  5 Christine Jacoba Aaftink   F  21    185     82    Netherlands
## 7  Albertville  5 Christine Jacoba Aaftink   F  25    185     82    Netherlands
## 8  Albertville  5 Christine Jacoba Aaftink   F  25    185     82    Netherlands
## 9  Lillehammer  5 Christine Jacoba Aaftink   F  27    185     82    Netherlands
## 10 Lillehammer  5 Christine Jacoba Aaftink   F  27    185     82    Netherlands
##    NOC       Games Year Season         Sport                              Event
## 1  CHN 1992 Summer 1992 Summer    Basketball        Basketball Men's Basketball
## 2  CHN 2012 Summer 2012 Summer          Judo       Judo Men's Extra-Lightweight
## 3  DEN 1920 Summer 1920 Summer      Football            Football Men's Football
## 4  DEN 1900 Summer 1900 Summer    Tug-Of-War        Tug-Of-War Men's Tug-Of-War
## 5  NED 1988 Winter 1988 Winter Speed Skating   Speed Skating Women's 500 metres
## 6  NED 1988 Winter 1988 Winter Speed Skating Speed Skating Women's 1,000 metres
## 7  NED 1992 Winter 1992 Winter Speed Skating   Speed Skating Women's 500 metres
## 8  NED 1992 Winter 1992 Winter Speed Skating Speed Skating Women's 1,000 metres
## 9  NED 1994 Winter 1994 Winter Speed Skating   Speed Skating Women's 500 metres
## 10 NED 1994 Winter 1994 Winter Speed Skating Speed Skating Women's 1,000 metres
##    Medal
## 1       
## 2       
## 3       
## 4   Gold
## 5       
## 6       
## 7       
## 8       
## 9       
## 10

4

summer68 <- dataset_olympics %>%
  filter(Year == 1968, Season == "Summer")
dim(summer68)
## [1] 2315   15

5

ts <- summer68 %>%
  group_by(Team) %>%
  summarise(Num_Athletes = n(), .groups = 'drop')
ts
## # A tibble: 98 × 2
##    Team        Num_Athletes
##    <chr>              <int>
##  1 Afghanistan            5
##  2 Argentina             43
##  3 Australia             82
##  4 Austria               11
##  5 Bahamas                3
##  6 Barbados               3
##  7 Belgium               29
##  8 Belize                 1
##  9 Bermuda                2
## 10 Bolivia                1
## # ℹ 88 more rows

United States

6

ma <- mean(summer68$Age, na.rm = TRUE)
ma
## [1] 24.32589
ma2 <- summer68 %>%
  group_by(Sex) %>%
  summarise(Mean_Age = mean(Age, na.rm = TRUE))
ma2
## # A tibble: 2 × 2
##   Sex   Mean_Age
##   <chr>    <dbl>
## 1 F         20.4
## 2 M         25.4

Yes, about a 5 year difference

7

summer68[summer68 == ""] <- NA
medals <- summer68 %>%
  filter(Team == "United States", !is.na(Medal)) %>%
  select(Name, Event, Medal)
medals
##                                          Name
## 1                           Gary Lee Anderson
## 2              Margaret Ann Bailes (Johnson-)
## 3                   John Lee "Johnny" Baldwin
## 4  Catherine Northcutt "Catie" Ball (-Condon)
## 5                Jane Louise Barkman (-Brown)
## 6                Jane Louise Barkman (-Brown)
## 7               Michael Thomas "Mike" Barrett
## 8                         Peter Jones Barrett
## 9                         Robert "Bob" Beamon
## 10                      Donald Ray "Don" Behm
## 11                        Ralph Harold Boston
## 12           Gregory Fenton "Greg" Buckingham
## 13                  Michael Jay "Mike" Burton
## 14                  Michael Jay "Mike" Burton
## 15                         John Wesley Carlos
## 16          Edward Julius "Ed" Caruthers, Jr.
## 17                       John Richard Clawson
## 18     Eleanor Suzanne "Ellie" Daniel (-Drye)
## 19     Eleanor Suzanne "Ellie" Daniel (-Drye)
## 20     Eleanor Suzanne "Ellie" Daniel (-Drye)
## 21               Wilbur D. "Willie" Davenport
## 22                   Donald Francis "Don" Dee
## 23             Joseph Douglas "Joe" Dube, Sr.
## 24                           Lee Edward Evans
## 25                           Lee Edward Evans
## 26               Thomas Francis "Tom" Farrell
## 27            Barbara Ann Ferrell (-Edmonson)
## 28            Barbara Ann Ferrell (-Edmonson)
## 29                         John Edward Ferris
## 30                         John Edward Ferris
##                                                     Event  Medal
## 1  Shooting Mixed Free Rifle, Three Positions, 300 metres   Gold
## 2                  Athletics Women's 4 x 100 metres Relay   Gold
## 3                         Boxing Men's Light-Middleweight Bronze
## 4            Swimming Women's 4 x 100 metres Medley Relay   Gold
## 5                   Swimming Women's 200 metres Freestyle Bronze
## 6         Swimming Women's 4 x 100 metres Freestyle Relay   Gold
## 7                             Basketball Men's Basketball   Gold
## 8                       Sailing Mixed Two Person Keelboat   Gold
## 9                               Athletics Men's Long Jump   Gold
## 10                Wrestling Men's Bantamweight, Freestyle Silver
## 11                              Athletics Men's Long Jump Bronze
## 12            Swimming Men's 200 metres Individual Medley Silver
## 13                    Swimming Men's 400 metres Freestyle   Gold
## 14                  Swimming Men's 1,500 metres Freestyle   Gold
## 15                             Athletics Men's 200 metres Bronze
## 16                              Athletics Men's High Jump Silver
## 17                            Basketball Men's Basketball   Gold
## 18                  Swimming Women's 100 metres Butterfly Silver
## 19                  Swimming Women's 200 metres Butterfly Bronze
## 20           Swimming Women's 4 x 100 metres Medley Relay   Gold
## 21                     Athletics Men's 110 metres Hurdles   Gold
## 22                            Basketball Men's Basketball   Gold
## 23                        Weightlifting Men's Heavyweight Bronze
## 24                             Athletics Men's 400 metres   Gold
## 25                   Athletics Men's 4 x 400 metres Relay   Gold
## 26                             Athletics Men's 800 metres Bronze
## 27                           Athletics Women's 100 metres Silver
## 28                 Athletics Women's 4 x 100 metres Relay   Gold
## 29                    Swimming Men's 200 metres Butterfly Bronze
## 30            Swimming Men's 200 metres Individual Medley Bronze

8

You could use as.factor() to convert it to a factor inorder to sort

9

summer68 <- summer68 %>%
  mutate(BMI = Weight / (Height / 100)^2)
max_BMI <- max(summer68$BMI, na.rm = TRUE)
min_BMI <- min(summer68$BMI, na.rm = TRUE)
mean_BMI <- mean(summer68$BMI, na.rm = TRUE)

max_BMI
## [1] 43.5964
min_BMI
## [1] 16.56065
mean_BMI
## [1] 22.68064