cup <- read.csv("worldcup50.csv")
Useful functions
dim(cup)
## [1] 13 12
head(cup)
## X year team scored conceded penalties matches shots_on_goal won drawn
## 1 1 1950 Brazil 22 6 0 6 22 4 1
## 2 2 1950 Uruguay 15 5 0 4 15 3 1
## 3 3 1950 Sweden 11 17 1 5 11 2 1
## 4 4 1950 Spain 10 10 0 6 10 3 1
## 5 5 1950 Yugoslavia 7 3 0 3 7 2 0
## 6 6 1950 Chile 5 6 0 3 5 1 0
## lost wc_winner
## 1 1 FALSE
## 2 0 TRUE
## 3 2 FALSE
## 4 2 FALSE
## 5 1 FALSE
## 6 2 FALSE
summary(cup)
## X year team scored
## Min. : 1 Min. :1950 Length:13 Min. : 0.000
## 1st Qu.: 4 1st Qu.:1950 Class :character 1st Qu.: 2.000
## Median : 7 Median :1950 Mode :character Median : 4.000
## Mean : 7 Mean :1950 Mean : 6.769
## 3rd Qu.:10 3rd Qu.:1950 3rd Qu.:10.000
## Max. :13 Max. :1950 Max. :22.000
## conceded penalties matches shots_on_goal
## Min. : 2.000 Min. :0.0000 Min. :1.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.: 2.000
## Median : 6.000 Median :0.0000 Median :3.000 Median : 4.000
## Mean : 6.769 Mean :0.2308 Mean :3.385 Mean : 6.769
## 3rd Qu.: 8.000 3rd Qu.:0.0000 3rd Qu.:4.000 3rd Qu.:10.000
## Max. :17.000 Max. :1.0000 Max. :6.000 Max. :22.000
## won drawn lost wc_winner
## Min. :0.000 Min. :0.0000 Min. :0.000 Mode :logical
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.000 FALSE:12
## Median :1.000 Median :0.0000 Median :1.000 TRUE :1
## Mean :1.462 Mean :0.4615 Mean :1.462
## 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :4.000 Max. :1.0000 Max. :3.000
table(cup$won)
##
## 0 1 2 3 4
## 3 5 2 2 1
table(cup$won, cup$scored)
##
## 0 2 4 5 7 10 11 15 22
## 0 1 2 0 0 0 0 0 0 0
## 1 0 1 3 1 0 0 0 0 0
## 2 0 0 0 0 1 0 1 0 0
## 3 0 0 0 0 0 1 0 1 0
## 4 0 0 0 0 0 0 0 0 1
median(cup$scored)
## [1] 4
mean(cup$scored)
## [1] 6.769231
Indexing vs. subsetting
cup[2,]
## X year team scored conceded penalties matches shots_on_goal won drawn lost
## 2 2 1950 Uruguay 15 5 0 4 15 3 1 0
## wc_winner
## 2 TRUE
cup[,2]
## [1] 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950
# these two are identical, book uses both
subset(cup, team == "United States")
## X year team scored conceded penalties matches shots_on_goal won
## 9 9 1950 United States 4 8 1 3 4 1
## drawn lost wc_winner
## 9 0 2 FALSE
cup[cup$team == "United States"]
## won
## 1 4
## 2 3
## 3 2
## 4 3
## 5 2
## 6 1
## 7 1
## 8 1
## 9 1
## 10 1
## 11 0
## 12 0
## 13 0
# why does this work?
# TRUE for one row
cup$team == "United States"
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE
## [13] FALSE
cup[9,]
## X year team scored conceded penalties matches shots_on_goal won
## 9 9 1950 United States 4 8 1 3 4 1
## drawn lost wc_winner
## 9 0 2 FALSE
# can use multiple statements
subset(cup, scored > 10 & conceded < 10)
## X year team scored conceded penalties matches shots_on_goal won drawn lost
## 1 1 1950 Brazil 22 6 0 6 22 4 1 1
## 2 2 1950 Uruguay 15 5 0 4 15 3 1 0
## wc_winner
## 1 FALSE
## 2 TRUE