## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Loading required package: scales

Question for Analysis:

  1. Compare year over year win totals

Import Data

data_set <- read.csv("https://raw.githubusercontent.com/jasonjgy2000/IS607/master/Projects/Project%202/MLBTeamWins.csv")
head(tbl_df(data_set))
## Source: local data frame [2 x 33]
## 
##    Year     G   ARI   ATL   BLA   BAL   BOS   CHC   CHW   CIN   CLE   COL
##   (int) (int) (int) (int) (lgl) (int) (int) (int) (int) (int) (int) (int)
## 1  2015   156    75    62    NA    76    75    90    73    63    77    66
## 2  2014   162    64    79    NA    96    71    73    73    76    85    66
## Variables not shown: DET (int), HOU (int), KCR (int), ANA (int), LAD
##   (int), FLA (int), MIL (int), MIN (int), NYM (int), NYY (int), OAK (int),
##   PHI (int), PIT (int), SDP (int), SFG (int), SEA (int), STL (int), TBD
##   (int), TEX (int), TOR (int), WSN (int)

Data Transformation

Transforming the data frame from wide to long.

data_set <- gather(data_set,"team","score",2:33)
head(data_set)
##   Year team score
## 1 2015    G   156
## 2 2014    G   162
## 3 2015  ARI    75
## 4 2014  ARI    64
## 5 2015  ATL    62
## 6 2014  ATL    79

Analysis

Comparing year over year win totals

i<- data_set %>%  group_by(Year,team) %>% summarise(total = sum(score,na.rm = TRUE))

ggplot(data = i, aes(x=Year, y=total, fill=team)) + geom_bar(stat = "identity",  position="dodge")