This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.


library (devtools)

library (dplyr)

library (ggplot2)

library (shiny)

library(statsr)

library(GGally)

bons sites -> https://sosestatistica.com.br/como-fazer-e-analisar-um-histograma-no-r/

bons sites2 -> https://www.curso-r.com/material/ggplot/

Create the vector with saldo de gols do são paulo no camp brasileiro desde 2003

# Create the vector with saldo de gols do são paulo no camp brasileiro desde 2003
v <- c(14, 35, 10, 34, 36, 30, 15, 0, 11, 22, -1, 19, 6, 8, -1, 12, 9)

apofundando a análise

# Create the function.
getmode <- function(v) {
   uniqv <- unique(v)
   uniqv[which.max(tabulate(match(v, uniqv)))]
}

a moda do saldo de gols do são paulo desde 2003

# Calculate the mode using the user function.
result <- getmode(v)
print(result)
[1] -1

resumo até aqui

summary(v)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -1.00    8.00   12.00   15.24   22.00   36.00 
hist(v)

AGORA: transformar em bancos de dados (mais para frente incluir o ano, os gols marcados e tirar da análise dados de 2003,2004 e 2005 que tinha mais de 20 times)

# Transformar vetor para data.frame
dados2 <- data.frame(y = v)
# Criar histograma
h1 <- ggplot(dados2, aes(y)) + geom_histogram() +
  xlab("Dados") + ylab("Frequencia")
h1

dim(dados2)
[1] 17  1

transformar em variável categórica com categorias de gols

#transformar em variável categórica com categorias de gols
dados2$y <- with(dados2, cut(x = y, breaks = c (-2,10,20,30,40, max(y, na.rm = T)), labels=c('(-2~10]','(10-20)','(20-30]','(30~40)','>40'),
                                       ordered_result = TRUE, right = TRUE))
dim(dados2)
[1] 17  2
#recriar os dados numéricos como w
dados2$w <- c(14, 35, 10, 34, 36, 30, 15, 0, 11, 22, -1, 19, 6, 8, -1, 12, 9)
summary(dados2)
#acrescentar novo dado à 'dados2' ~gols marcados por ano e ano
#y são os anos em que há saldo de gols positivos
#g gols por campeonato
#h é igual a g e será transformado em categorial
dados2$ano <- c(2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019)
dados2$g <- c(NA,NA,NA,66,55,66,57,54,57,59,39,59,53,44,48,46,39)
dados2$h <- c(NA,NA,NA,66,55,66,57,54,57,59,39,59,53,44,48,46,39)
dim(dados2) 
[1] 17  6
##transformar em variável categórica com categorias de gols marcados
dados2$h <- with(dados2, cut(x = h, breaks = c (45,49,59,67, max(y, na.rm = T)), labels=c('(0~45]','(45-49)','(49-59]','>59'),
                                       ordered_result = TRUE, right = TRUE))
summary(dados2$h)
 (0~45] (45-49) (49-59]     >59    NA's 
      3       2       7       2       3 
#cruzando ano com os dados
dados2 %>% 
  group_by(ano) %>% 
  ggplot() +
  geom_line(aes(x = ano, y = w))

#cruzando ano com os dados gols pró por ano
dados2 %>% 
  group_by(ano) %>% 
  ggplot(na.rm = TRUE) +
  geom_line(aes(x = ano, y = g))

#tirar os anos de 2003, 2004 e 2005 da amostra (eram mais de 20 times)
dados2<-droplevels(subset(dados2,g!="NA"))
dados2<-droplevels(subset(dados2,h!="NA"))
# cruzamento saldo de gols e gols pró
ggplot(dados2) +
  geom_point(mapping = aes(x = w, y = g)) +
  geom_abline(intercept = 0, slope = 1, color = "red")

nota: nao parece haver muita relação

##proptable de y e h (as categóricas)
 prop.table(table(dados2$y,dados2$h),2)
         
             (0~45]   (45-49)   (49-59]       >59
  (-2~10] 1.0000000 0.5000000 0.2857143 0.0000000
  (10-20) 0.0000000 0.5000000 0.4285714 0.0000000
  (20-30] 0.0000000 0.0000000 0.1428571 0.5000000
  (30~40) 0.0000000 0.0000000 0.1428571 0.5000000
# cruzamento saldo de gols e gols pró com  mais frescuras 
dados2  %>% 
ggplot() + 
  geom_boxplot(aes(x = h, y = w))


# com a média (saldo de gols por faixa de gols pró)
dados2 %>%
  group_by(h) %>%
  summarise(mean_SG = mean(w))
`summarise()` ungrouping output (override with `.groups` argument)
#com a mediana (saldo de gols por faixa de gols pró)
dados2 %>%
  group_by(h) %>%
  summarise(median_SG = median(w))
`summarise()` ungrouping output (override with `.groups` argument)

Como deu certo

by(dados2$ya, dados2$y, summary)
dados2$y: (-2~10]
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -1.00   -0.75    3.00    3.50    7.50    9.00 
----------------------------------------------- 
dados2$y: (10-20)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  11.00   11.75   13.50   14.25   16.00   19.00 
----------------------------------------------- 
dados2$y: (20-30]
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     22      24      26      26      28      30 
----------------------------------------------- 
dados2$y: (30~40)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   34.0    34.5    35.0    35.0    35.5    36.0 

acima como era esperado, bate certinho

por fim, uma ultima analise dos gols pró explicando o saldo de gols

by(dados2$ya, dados2$h, summary)
dados2$h: (0~45]
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -1.000   3.500   8.000   5.333   8.500   9.000 
----------------------------------------------- 
dados2$h: (45-49)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  -1.00    2.25    5.50    5.50    8.75   12.00 
----------------------------------------------- 
dados2$h: (49-59]
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    8.50   15.00   15.57   20.50   36.00 
----------------------------------------------- 
dados2$h: >59
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
     30      31      32      32      33      34 

Se o time fizer mais de 59 gols sobe para caralho o saldo

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQoNCmxpYnJhcnkgKGRldnRvb2xzKQ0KDQpsaWJyYXJ5IChkcGx5cikNCg0KbGlicmFyeSAoZ2dwbG90MikNCg0KbGlicmFyeSAoc2hpbnkpDQoNCmxpYnJhcnkoc3RhdHNyKQ0KDQpsaWJyYXJ5KEdHYWxseSkNCg0KYGBgDQoNCmJvbnMgc2l0ZXMgLT4gaHR0cHM6Ly9zb3Nlc3RhdGlzdGljYS5jb20uYnIvY29tby1mYXplci1lLWFuYWxpc2FyLXVtLWhpc3RvZ3JhbWEtbm8tci8NCg0KYm9ucyBzaXRlczIgLT4gaHR0cHM6Ly93d3cuY3Vyc28tci5jb20vbWF0ZXJpYWwvZ2dwbG90Lw0KDQoNCiMgQ3JlYXRlIHRoZSB2ZWN0b3Igd2l0aCBzYWxkbyBkZSBnb2xzIGRvIHPDo28gcGF1bG8gbm8gY2FtcCBicmFzaWxlaXJvIGRlc2RlIDIwMDMNCg0KDQpgYGB7cn0NCiMgQ3JlYXRlIHRoZSB2ZWN0b3Igd2l0aCBzYWxkbyBkZSBnb2xzIGRvIHPDo28gcGF1bG8gbm8gY2FtcCBicmFzaWxlaXJvIGRlc2RlIDIwMDMNCnYgPC0gYygxNCwgMzUsIDEwLCAzNCwgMzYsIDMwLCAxNSwgMCwgMTEsIDIyLCAtMSwgMTksIDYsIDgsIC0xLCAxMiwgOSkNCmBgYA0KDQojIGFwb2Z1bmRhbmRvIGEgYW7DoWxpc2UNCg0KYGBge3J9DQojIENyZWF0ZSB0aGUgZnVuY3Rpb24uDQpnZXRtb2RlIDwtIGZ1bmN0aW9uKHYpIHsNCiAgIHVuaXF2IDwtIHVuaXF1ZSh2KQ0KICAgdW5pcXZbd2hpY2gubWF4KHRhYnVsYXRlKG1hdGNoKHYsIHVuaXF2KSkpXQ0KfQ0KYGBgDQoNCiMjIGEgbW9kYSBkbyBzYWxkbyBkZSBnb2xzIGRvIHPDo28gcGF1bG8gZGVzZGUgMjAwMw0KYGBge3J9DQojIENhbGN1bGF0ZSB0aGUgbW9kZSB1c2luZyB0aGUgdXNlciBmdW5jdGlvbi4NCnJlc3VsdCA8LSBnZXRtb2RlKHYpDQpwcmludChyZXN1bHQpDQpgYGANCg0KcmVzdW1vIGF0w6kgYXF1aQ0KYGBge3J9DQpzdW1tYXJ5KHYpDQpoaXN0KHYpDQpgYGANCg0KIyBBR09SQTogdHJhbnNmb3JtYXIgZW0gYmFuY29zIGRlIGRhZG9zIChtYWlzIHBhcmEgZnJlbnRlIGluY2x1aXIgbyBhbm8sIG9zIGdvbHMgbWFyY2Fkb3MgZSB0aXJhciBkYSBhbsOhbGlzZSBkYWRvcyBkZSAyMDAzLDIwMDQgZSAyMDA1IHF1ZSB0aW5oYSBtYWlzIGRlIDIwIHRpbWVzKQ0KDQpgYGB7cn0NCiMgVHJhbnNmb3JtYXIgdmV0b3IgcGFyYSBkYXRhLmZyYW1lDQpkYWRvczIgPC0gZGF0YS5mcmFtZSh5ID0gdikNCiMgQ3JpYXIgaGlzdG9ncmFtYQ0KaDEgPC0gZ2dwbG90KGRhZG9zMiwgYWVzKHkpKSArIGdlb21faGlzdG9ncmFtKCkgKw0KICB4bGFiKCJEYWRvcyIpICsgeWxhYigiRnJlcXVlbmNpYSIpDQpoMQ0KYGBgDQoNCmBgYHtyfQ0KZGltKGRhZG9zMikNCmBgYA0KDQp0cmFuc2Zvcm1hciBlbSB2YXJpw6F2ZWwgY2F0ZWfDs3JpY2EgY29tIGNhdGVnb3JpYXMgZGUgZ29scw0KDQpgYGB7cn0NCiN0cmFuc2Zvcm1hciBlbSB2YXJpw6F2ZWwgY2F0ZWfDs3JpY2EgY29tIGNhdGVnb3JpYXMgZGUgZ29scw0KZGFkb3MyJHkgPC0gd2l0aChkYWRvczIsIGN1dCh4ID0geSwgYnJlYWtzID0gYyAoLTIsMTAsMjAsMzAsNDAsIG1heCh5LCBuYS5ybSA9IFQpKSwgbGFiZWxzPWMoJygtMn4xMF0nLCcoMTAtMjApJywnKDIwLTMwXScsJygzMH40MCknLCc+NDAnKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yZGVyZWRfcmVzdWx0ID0gVFJVRSwgcmlnaHQgPSBUUlVFKSkNCmRpbShkYWRvczIpDQpgYGANCg0KYGBge3J9DQojcmVjcmlhciBvcyBkYWRvcyBudW3DqXJpY29zIGNvbW8gdw0KZGFkb3MyJHcgPC0gYygxNCwgMzUsIDEwLCAzNCwgMzYsIDMwLCAxNSwgMCwgMTEsIDIyLCAtMSwgMTksIDYsIDgsIC0xLCAxMiwgOSkNCnN1bW1hcnkoZGFkb3MyKQ0KYGBgDQoNCg0KYGBge3J9DQojYWNyZXNjZW50YXIgbm92byBkYWRvIMOgICdkYWRvczInIH5nb2xzIG1hcmNhZG9zIHBvciBhbm8gZSBhbm8NCiN5IHPDo28gb3MgYW5vcyBlbSBxdWUgaMOhIHNhbGRvIGRlIGdvbHMgcG9zaXRpdm9zDQojZyBnb2xzIHBvciBjYW1wZW9uYXRvDQojaCDDqSBpZ3VhbCBhIGcgZSBzZXLDoSB0cmFuc2Zvcm1hZG8gZW0gY2F0ZWdvcmlhbA0KZGFkb3MyJGFubyA8LSBjKDIwMDMsMjAwNCwyMDA1LDIwMDYsMjAwNywyMDA4LDIwMDksMjAxMCwyMDExLDIwMTIsMjAxMywyMDE0LDIwMTUsMjAxNiwyMDE3LDIwMTgsMjAxOSkNCmRhZG9zMiRnIDwtIGMoTkEsTkEsTkEsNjYsNTUsNjYsNTcsNTQsNTcsNTksMzksNTksNTMsNDQsNDgsNDYsMzkpDQpkYWRvczIkaCA8LSBjKE5BLE5BLE5BLDY2LDU1LDY2LDU3LDU0LDU3LDU5LDM5LDU5LDUzLDQ0LDQ4LDQ2LDM5KQ0KZGltKGRhZG9zMikgDQoNCmBgYA0KDQpgYGB7cn0NCiMjdHJhbnNmb3JtYXIgZW0gdmFyacOhdmVsIGNhdGVnw7NyaWNhIGNvbSBjYXRlZ29yaWFzIGRlIGdvbHMgbWFyY2Fkb3MNCmRhZG9zMiRoIDwtIHdpdGgoZGFkb3MyLCBjdXQoeCA9IGgsIGJyZWFrcyA9IGMgKDQ1LDQ5LDU5LDY3LCBtYXgoeSwgbmEucm0gPSBUKSksIGxhYmVscz1jKCcoMH40NV0nLCcoNDUtNDkpJywnKDQ5LTU5XScsJz41OScpLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgb3JkZXJlZF9yZXN1bHQgPSBUUlVFLCByaWdodCA9IFRSVUUpKQ0Kc3VtbWFyeShkYWRvczIkaCkNCmBgYA0KDQpgYGB7cn0NCiNjcnV6YW5kbyBhbm8gY29tIG9zIGRhZG9zIFNhbGRvIGRlIGdvbHMgcG9yIGFubw0KZGFkb3MyICU+JSANCiAgZ3JvdXBfYnkoYW5vKSAlPiUgDQogIGdncGxvdCgpICsNCiAgZ2VvbV9saW5lKGFlcyh4ID0gYW5vLCB5ID0gdykpDQpgYGANCg0KYGBge3J9DQojY3J1emFuZG8gYW5vIGNvbSBvcyBkYWRvcyBnb2xzIHByw7MgcG9yIGFubw0KZGFkb3MyICU+JSANCiAgZ3JvdXBfYnkoYW5vKSAlPiUgDQogIGdncGxvdChuYS5ybSA9IFRSVUUpICsNCiAgZ2VvbV9saW5lKGFlcyh4ID0gYW5vLCB5ID0gZykpDQpgYGANCg0KYGBge3J9DQojdGlyYXIgb3MgYW5vcyBkZSAyMDAzLCAyMDA0IGUgMjAwNSBkYSBhbW9zdHJhIChlcmFtIG1haXMgZGUgMjAgdGltZXMpDQpkYWRvczI8LWRyb3BsZXZlbHMoc3Vic2V0KGRhZG9zMixnIT0iTkEiKSkNCmRhZG9zMjwtZHJvcGxldmVscyhzdWJzZXQoZGFkb3MyLGghPSJOQSIpKQ0KYGBgDQoNCmBgYHtyfQ0KIyBjcnV6YW1lbnRvIHNhbGRvIGRlIGdvbHMgZSBnb2xzIHByw7MNCmdncGxvdChkYWRvczIpICsNCiAgZ2VvbV9wb2ludChtYXBwaW5nID0gYWVzKHggPSB3LCB5ID0gZykpICsNCiAgZ2VvbV9hYmxpbmUoaW50ZXJjZXB0ID0gMCwgc2xvcGUgPSAxLCBjb2xvciA9ICJyZWQiKQ0KYGBgDQoNCm5vdGE6IG5hbyBwYXJlY2UgaGF2ZXIgbXVpdGEgcmVsYcOnw6NvDQoNCmBgYHtyfQ0KIyNwcm9wdGFibGUgZGUgeSBlIGggKGFzIGNhdGVnw7NyaWNhcykNCiBwcm9wLnRhYmxlKHRhYmxlKGRhZG9zMiR5LGRhZG9zMiRoKSwyKQ0KYGBgDQoNCmBgYHtyfQ0KIyBjcnV6YW1lbnRvIHNhbGRvIGRlIGdvbHMgZSBnb2xzIHByw7MgY29tICBtYWlzIGZyZXNjdXJhcyANCmRhZG9zMiAgJT4lIA0KZ2dwbG90KCkgKyANCiAgZ2VvbV9ib3hwbG90KGFlcyh4ID0gaCwgeSA9IHcpKQ0KDQojIGNvbSBhIG3DqWRpYSAoc2FsZG8gZGUgZ29scyBwb3IgZmFpeGEgZGUgZ29scyBwcsOzKQ0KZGFkb3MyICU+JQ0KICBncm91cF9ieShoKSAlPiUNCiAgc3VtbWFyaXNlKG1lYW5fU0cgPSBtZWFuKHcpKQ0KDQojY29tIGEgbWVkaWFuYSAoc2FsZG8gZGUgZ29scyBwb3IgZmFpeGEgZGUgZ29scyBwcsOzKQ0KZGFkb3MyICU+JQ0KICBncm91cF9ieShoKSAlPiUNCiAgc3VtbWFyaXNlKG1lZGlhbl9TRyA9IG1lZGlhbih3KSkNCmBgYA0KDQpDb21vIGRldSBjZXJ0byANCg0KYGBge3J9DQpieShkYWRvczIkeWEsIGRhZG9zMiR5LCBzdW1tYXJ5KQ0KYGBgDQoNCmFjaW1hIGNvbW8gZXJhIGVzcGVyYWRvLCBiYXRlIGNlcnRpbmhvDQoNCnBvciBmaW0sIHVtYSB1bHRpbWEgYW5hbGlzZSBkb3MgZ29scyBwcsOzIGV4cGxpY2FuZG8gbyBzYWxkbyBkZSBnb2xzDQpgYGB7cn0NCmJ5KGRhZG9zMiR5YSwgZGFkb3MyJGgsIHN1bW1hcnkpDQpgYGANCg0KU2UgbyB0aW1lIGZpemVyIG1haXMgZGUgNTkgZ29scyBzb2JlIHBhcmEgY2FyYWxobyBvIHNhbGRvDQo=