Assigment 2 (Part 1)
From Lahman baseball database library(Lahman). Get the number of
distinct players in the HallOfFame data set
library(Lahman)
distinct_players <- unique(HallOfFame$playerID)
number_distinct_players <- length(distinct_players)
number_distinct_players
## [1] 1344
Find distinct players (category == “Player”) in HallOfFame with
inducted == “Y”
inducted_players <- subset(HallOfFame, category == "Player" & inducted == "Y")
unique_inducted_players <- unique(inducted_players$playerID)
number_inducted_players <- length(unique_inducted_players)
number_inducted_players
## [1] 270
Create a HOFplayers data frame for players (category ==
“Player”)
voted by BBWAA
HOF_players <- subset(HallOfFame, category == "Player" & votedBy == "BBWAA")
class(HOF_players)
## [1] "data.frame"
Remove all NAs from your HOFplayers\(votes
column and Bonus: Also remove all zeros from your
HOFplayers\)votes column
HOF_players <- HOF_players[!is.na(HOF_players$votes) & HOF_players$votes != 0,]
tail(HOF_players)
## playerID yearID votedBy ballots needed votes inducted category
## 4310 howarry01 2022 BBWAA 394 296 8 N Player
## 4311 teixema01 2022 BBWAA 394 296 6 N Player
## 4312 papeljo01 2022 BBWAA 394 296 5 N Player
## 4313 morneju01 2022 BBWAA 394 296 5 N Player
## 4314 pierzaj01 2022 BBWAA 394 296 2 N Player
## 4315 fieldpr01 2022 BBWAA 394 296 2 N Player
## needed_note
## 4310 <NA>
## 4311 <NA>
## 4312 <NA>
## 4313 <NA>
## 4314 <NA>
## 4315 <NA>
Assignment 2 (Part 2)
Create a function which returns “Elected” if inducted == “Y” and
“Not elected” otherwise
election_status <- function(inducted) {
if (inducted == "Y") {
return("Elected")
} else {
return("Not elected")
}
}
test_election_status <- HallOfFame$inducted[10]
print(election_status(test_election_status))
## [1] "Not elected"
Create “elected” column in your HOFplayers by applying your
function
HOF_players$elected <- sapply(HOF_players$inducted, election_status)
head(HOF_players)
## playerID yearID votedBy ballots needed votes inducted category needed_note
## 1 cobbty01 1936 BBWAA 226 170 222 Y Player <NA>
## 2 ruthba01 1936 BBWAA 226 170 215 Y Player <NA>
## 3 wagneho01 1936 BBWAA 226 170 215 Y Player <NA>
## 4 mathech01 1936 BBWAA 226 170 205 Y Player <NA>
## 5 johnswa01 1936 BBWAA 226 170 189 Y Player <NA>
## 6 lajoina01 1936 BBWAA 226 170 146 N Player <NA>
## elected
## 1 Elected
## 2 Elected
## 3 Elected
## 4 Elected
## 5 Elected
## 6 Not elected
Add pct = 100 * round(votes/ballots, 2)) variable to HOFplayers data
frame calculated by player
Hint - group_by(playerID) %>%
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
HOF_players <- HOF_players %>%
group_by(playerID) %>%
mutate(pct = 100 * round(votes/ballots, 2))
head(HOF_players)
## # A tibble: 6 × 11
## # Groups: playerID [6]
## playerID yearID votedBy ballots needed votes inducted category needed_note
## <chr> <int> <chr> <dbl> <dbl> <dbl> <fct> <fct> <chr>
## 1 cobbty01 1936 BBWAA 226 170 222 Y Player <NA>
## 2 ruthba01 1936 BBWAA 226 170 215 Y Player <NA>
## 3 wagneho01 1936 BBWAA 226 170 215 Y Player <NA>
## 4 mathech01 1936 BBWAA 226 170 205 Y Player <NA>
## 5 johnswa01 1936 BBWAA 226 170 189 Y Player <NA>
## 6 lajoina01 1936 BBWAA 226 170 146 N Player <NA>
## # ℹ 2 more variables: elected <chr>, pct <dbl>
Plot histogram of pct for elected == “Elected” only
elected_data <- HOF_players[HOF_players$elected == "Elected",]
hist(elected_data$pct, main = "Histogram of pct for Elected Players",
xlab = "Percentage of Votes",
ylab = "Frequency",
col = "lightblue",
border = "black")

Bonus: Make your histogram nicer, use ggplot
library(ggplot2)
ggplot(elected_data, aes(x = pct)) +
geom_histogram(binwidth = 2, fill = "blue", color = "black", alpha = 0.7) +
labs(title = "Histogram of pct for Elected Players",
x = "Percentage of Votes",
y = "Number of Players") +
theme_minimal()
