Bibliotecas

library("BaylorEdPsych")
library(dplyr)
library(ggplot2)
library(ggrepel)
library('psych')
data(MLBPitching2011) 

Visualizacão dos dados

head(MLBPitching2011)

Descrição Estatística das Variáveis

describe(MLBPitching2011$SO)
describe(MLBPitching2011$Str)

Plotagem dos subgrupos por intervalos definidos

ggplot(MLBPitching2011, aes(x=Str, y=SO))+
  geom_point(size=6, colour=ifelse(MLBPitching2011$Str>=842, '#014d64', 
                                   '#76c0c1'),
             alpha=ifelse(MLBPitching2011$Str>=842&MLBPitching2011$SO<74, 1,.6))+
  geom_text_repel(aes(label=Tm),
                  colour='red',
                  #alpha=c(.4,1,rep(.4,5)),
                  box.padding = .2,
                  point.padding = .3,
                  data=subset(MLBPitching2011, Str > 1200 & Str < 1450) )+
  xlab('Strikes thrown')+
  ylab('Strike out')+
  labs(title='2011 Major League Baseball',
       caption='Pitching data')+
  theme(plot.title = element_text(size=15))+
  geom_smooth(method='lm', colour='black', linetype='dashed', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
## Warning: ggrepel: 6 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps