Bibliotecas

library("BaylorEdPsych")
library(dplyr)
library(ggplot2)
library(scales)
library(ggalt)
library(psych)
data(MLBPitching2011) 

Visualizacão dos dados

head(MLBPitching2011)

Descrição dos dados

glimpse(MLBPitching2011)
## Rows: 642
## Columns: 45
## $ Fname <fct> Kevin, Craig, Andrew, Kevin, Cesar, Jose, Dave, Jeff, Dustin, Da…
## $ Lname <fct> Gregg, Breslow, Bailey, Slowey, Ramos, Mijares, Bush, Gray, McGo…
## $ SO    <int> 53, 44, 41, 34, 31, 30, 23, 23, 20, 18, 16, 14, 14, 14, 13, 12, …
## $ Year  <int> 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011…
## $ Age   <int> 33, 30, 27, 27, 27, 26, 31, 29, 29, 28, 22, 25, 38, 23, 24, 28, …
## $ Tm    <fct> BAL, OAK, OAK, MIN, TBR, MIN, TEX, TOT, TOR, TEX, MIN, DET, TEX,…
## $ Lg    <fct> AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, …
## $ G     <int> 63, 67, 42, 14, 59, 58, 17, 30, 5, 16, 4, 14, 8, 4, 7, 15, 8, 7,…
## $ GS    <int> 0, 0, 0, 8, 0, 0, 3, 0, 4, 0, 4, 2, 0, 4, 5, 4, 0, 3, 0, 3, 0, 0…
## $ CG    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SHO   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ GF    <int> 48, 10, 37, 1, 9, 13, 7, 11, 0, 7, 0, 4, 3, 0, 2, 8, 6, 2, 7, 0,…
## $ W     <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ L     <int> 3, 2, 4, 8, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 3, 2, 1, 2, 1, 1, 1, 1…
## $ WLP   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ SV    <int> 22, 0, 24, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,…
## $ IP    <int> 59, 59, 41, 59, 43, 49, 37, 48, 21, 16, 23, 29, 17, 17, 24, 35, …
## $ H     <int> 58, 69, 34, 78, 36, 53, 47, 52, 20, 17, 29, 28, 15, 26, 29, 40, …
## $ R     <int> 35, 29, 18, 44, 22, 31, 27, 23, 15, 10, 16, 16, 9, 16, 22, 22, 4…
## $ ER    <int> 29, 25, 15, 44, 19, 25, 24, 23, 15, 10, 16, 14, 9, 12, 21, 22, 4…
## $ BB    <int> 40, 21, 12, 5, 25, 30, 9, 21, 13, 5, 6, 11, 10, 7, 12, 12, 7, 7,…
## $ ERA   <int> 4, 4, 3, 7, 4, 5, 6, 4, 6, 5, 6, 4, 5, 6, 8, 6, 5, 6, 7, 9, 9, 1…
## $ ERAP  <int> 94, 106, 125, 61, 95, 89, 77, 93, 68, 83, 67, 96, 98, 66, 56, 67…
## $ HR    <int> 7, 4, 3, 10, 4, 4, 6, 4, 4, 7, 3, 2, 4, 1, 5, 10, 1, 4, 6, 3, 1,…
## $ BF    <int> 275, 261, 170, 258, 192, 228, 166, 215, 96, 74, 100, 127, 74, 84…
## $ AB    <int> 228, 233, 156, 243, 161, 193, 152, 189, 81, 65, 93, 111, 61, 77,…
## $ B1    <int> 40, 50, 19, 49, 26, 34, 37, 35, 11, 9, 18, 22, 7, 18, 18, 24, 2,…
## $ B2    <int> 9, 14, 11, 17, 6, 14, 4, 12, 4, 1, 6, 4, 3, 7, 5, 6, 2, 4, 2, 1,…
## $ B3    <int> 2, 1, 1, 2, 0, 1, 0, 1, 1, 0, 2, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0…
## $ IBB   <int> 4, 1, 2, 0, 8, 2, 2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1…
## $ HBP   <int> 2, 2, 0, 3, 3, 3, 3, 1, 1, 2, 0, 1, 0, 0, 4, 1, 0, 0, 1, 1, 1, 0…
## $ SH    <int> 4, 3, 1, 3, 1, 0, 2, 1, 0, 1, 0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ SF    <int> 1, 2, 1, 4, 2, 2, 0, 3, 1, 1, 1, 3, 1, 0, 1, 1, 0, 0, 0, 1, 5, 0…
## $ GDP   <int> 5, 5, 3, 5, 4, 4, 7, 5, 0, 0, 4, 2, 5, 1, 2, 4, 0, 1, 1, 1, 2, 1…
## $ SB    <int> 6, 3, 5, 4, 4, 2, 8, 3, 4, 4, 0, 2, 1, 1, 0, 1, 0, 1, 2, 7, 0, 3…
## $ CS    <int> 2, 7, 0, 1, 0, 3, 1, 3, 0, 0, 0, 1, 0, 1, 1, 2, 1, 1, 0, 0, 0, 0…
## $ PO    <int> 0, 5, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0…
## $ BK    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ WP    <int> 2, 3, 0, 0, 1, 1, 2, 2, 3, 0, 1, 2, 1, 0, 0, 0, 2, 2, 1, 0, 0, 0…
## $ BA    <dbl> 0.254, 0.296, 0.218, 0.321, 0.224, 0.275, 0.309, 0.275, 0.247, 0…
## $ OBP   <dbl> 0.369, 0.357, 0.272, 0.337, 0.335, 0.377, 0.360, 0.346, 0.354, 0…
## $ SLG   <dbl> 0.404, 0.416, 0.359, 0.531, 0.335, 0.420, 0.454, 0.413, 0.469, 0…
## $ OPS   <dbl> 0.773, 0.773, 0.631, 0.868, 0.670, 0.797, 0.814, 0.758, 0.823, 0…
## $ Pit   <int> 1190, 1026, 700, 914, 726, 944, 595, 755, 376, 275, 377, 484, 28…
## $ Str   <int> 698, 649, 465, 641, 419, 563, 387, 454, 215, 178, 242, 306, 171,…

Descrição Estatística das Variáveis

describe(MLBPitching2011$SO)
describe(MLBPitching2011$Str)

Plotagem dos subgrupos por intervalos definidos

subgrupo_1 = MLBPitching2011[MLBPitching2011$Str > 2 & MLBPitching2011$Str < 154 &
                     MLBPitching2011$SO > 0 & MLBPitching2011$SO < 11,]

subgrupo_2 = MLBPitching2011[MLBPitching2011$Str >= 154 & MLBPitching2011$Str < 514 &
                     MLBPitching2011$SO >= 11 & MLBPitching2011$SO < 36,]

subgrupo_3 = MLBPitching2011[MLBPitching2011$Str >= 514 & MLBPitching2011$Str < 2591 &
                      MLBPitching2011$SO >= 36 & MLBPitching2011$SO < 250,]

ggplot(MLBPitching2011, aes(Str, SO))+
  geom_point(size=.6, colour='black')+
  geom_encircle(aes(x=Str, y=SO), 
                data=subgrupo_1, 
                color="red", 
                size=2, 
                expand=0.08)+
  geom_smooth(aes(x=Str, y=SO),
              data=subgrupo_1,
              method='auto',
              colour='#8abbd0',
              size=1.5)+
  geom_encircle(aes(x=Str, y=SO), 
                data=subgrupo_2, 
                color="orange", 
                size=2, 
                expand=0.08)+
  geom_smooth(aes(x=Str, y=SO),
              data=subgrupo_2,
              method='lm',
              colour='#8abbd0',
              size=1.5)+
  geom_encircle(aes(x=Str, y=SO), 
                data=subgrupo_3, 
                color="blue", 
                size=2, 
                expand=0.08)+
  geom_smooth(aes(x=Str, y=SO),
              data=subgrupo_3,
              method='auto',
              colour='#8abbd0',
              size=1.5,
              se=FALSE)+
  labs(x='Strikes thrown', y='Strike outs',
       title='2011 Major League Baseball ',
       subtitle='Pitching data',
       caption='Fonte: BaylorEdPsych: MLBPitching2011')
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'