options(warn=-1)
library(RSQLite)
library(DBI)
library(plyr)
library(ggplot2)
library(reshape2)
con = dbConnect(RSQLite::SQLite(),'lahman2016.sqlite')
dbListTables(con)
## [1] "AllstarFull" "Appearances" "AwardsManagers"
## [4] "AwardsPlayers" "AwardsShareManagers" "AwardsSharePlayers"
## [7] "Batting" "BattingPost" "CollegePlaying"
## [10] "Fielding" "FieldingOF" "FieldingOFsplit"
## [13] "FieldingPost" "HallOfFame" "HomeGames"
## [16] "Managers" "ManagersHalf" "Master"
## [19] "Parks" "Pitching" "PitchingPost"
## [22] "Salaries" "Schools" "SeriesPost"
## [25] "Teams" "TeamsFranchises" "TeamsHalf"
dbListFields(con, "Master")
## [1] "playerID" "birthYear" "birthMonth" "birthDay"
## [5] "birthCountry" "birthState" "birthCity" "deathYear"
## [9] "deathMonth" "deathDay" "deathCountry" "deathState"
## [13] "deathCity" "nameFirst" "nameLast" "nameGiven"
## [17] "weight" "height" "bats" "throws"
## [21] "debut" "finalGame" "retroID" "bbrefID"
pitchers<- dbGetQuery(con, "SELECT Master.nameLast, Master.nameGiven, sum(Pitching.SO) AS 'Strikeouts', sum(Pitching.W) As 'Wins'
FROM Master
INNER JOIN Pitching ON Pitching.playerID = Master.playerID
GROUP BY Master.nameLast, Master.nameGiven
HAVING sum(Pitching.SO) >= 3000
Order BY sum(Pitching.SO) desc, sum(Pitching.W) desc ")
pitchers
## nameLast nameGiven Strikeouts Wins
## 1 Ryan Lynn Nolan 5714 324
## 2 Johnson Randall David 4875 303
## 3 Clemens William Roger 4672 354
## 4 Carlton Steven Norman 4136 329
## 5 Blyleven Rik Aalbert 3701 287
## 6 Seaver George Thomas 3640 311
## 7 Sutton Donald Howard 3574 324
## 8 Perry Gaylord Jackson 3534 314
## 9 Johnson Walter Perry 3509 417
## 10 Maddux Gregory Alan 3371 355
## 11 Niekro Philip Henry 3342 318
## 12 Jenkins Ferguson Arthur 3192 284
## 13 Martinez Pedro Jaime 3154 219
## 14 Gibson Robert 3117 251
## 15 Schilling Curtis Montague 3116 216
## 16 Smoltz John Andrew 3084 213
top_3_pitchers<- dbGetQuery(con, "SELECT Master.nameLast || ', ' || Master.nameGiven AS 'Name', sum(Pitching.SO) AS 'Strikeouts', sum(Pitching.W) As 'Wins'
FROM Master
INNER JOIN Pitching ON Pitching.playerID = Master.playerID
GROUP BY Master.nameLast, Master.nameGiven
HAVING sum(Pitching.SO) >= 3000
Order BY sum(Pitching.SO) desc, sum(Pitching.W) desc
Limit 3")
top_3_pitchers
## Name Strikeouts Wins
## 1 Ryan, Lynn Nolan 5714 324
## 2 Johnson, Randall David 4875 303
## 3 Clemens, William Roger 4672 354
data.m <- melt(top_3_pitchers, id.vars='Name')
# plot everything
ggplot(data.m, aes(Name, value)) +
geom_bar(aes(fill = variable), position = "dodge", stat="identity")+
geom_text(aes(label=value), vjust=1.6, hjust=-.25, color="blue",
position = position_dodge(1.0), size=3.5)