##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Loading required package: scales
data_set <- read.csv("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Projects/Project%202/MLBTeamWins.csv")
head(tbl_df(data_set))
## Source: local data frame [2 x 33]
##
## Year G ARI ATL BLA BAL BOS CHC CHW CIN CLE COL
## (int) (int) (int) (int) (lgl) (int) (int) (int) (int) (int) (int) (int)
## 1 2015 156 75 62 NA 76 75 90 73 63 77 66
## 2 2014 162 64 79 NA 96 71 73 73 76 85 66
## Variables not shown: DET (int), HOU (int), KCR (int), ANA (int), LAD
## (int), FLA (int), MIL (int), MIN (int), NYM (int), NYY (int), OAK (int),
## PHI (int), PIT (int), SDP (int), SFG (int), SEA (int), STL (int), TBD
## (int), TEX (int), TOR (int), WSN (int)
Transforming the data frame from wide to long.
data_set <- gather(data_set,"team","score",2:33)
head(data_set)
## Year team score
## 1 2015 G 156
## 2 2014 G 162
## 3 2015 ARI 75
## 4 2014 ARI 64
## 5 2015 ATL 62
## 6 2014 ATL 79
Comparing year over year win totals
i<- data_set %>% group_by(Year,team) %>% summarise(total = sum(score,na.rm = TRUE))
ggplot(data = i, aes(x=Year, y=total, fill=team)) + geom_bar(stat = "identity", position="dodge")