cup <- read.csv("worldcup50.csv")

Useful functions

dim(cup)
## [1] 13 12
head(cup)
##   X year       team scored conceded penalties matches shots_on_goal won drawn
## 1 1 1950     Brazil     22        6         0       6            22   4     1
## 2 2 1950    Uruguay     15        5         0       4            15   3     1
## 3 3 1950     Sweden     11       17         1       5            11   2     1
## 4 4 1950      Spain     10       10         0       6            10   3     1
## 5 5 1950 Yugoslavia      7        3         0       3             7   2     0
## 6 6 1950      Chile      5        6         0       3             5   1     0
##   lost wc_winner
## 1    1     FALSE
## 2    0      TRUE
## 3    2     FALSE
## 4    2     FALSE
## 5    1     FALSE
## 6    2     FALSE
summary(cup)
##        X           year          team               scored      
##  Min.   : 1   Min.   :1950   Length:13          Min.   : 0.000  
##  1st Qu.: 4   1st Qu.:1950   Class :character   1st Qu.: 2.000  
##  Median : 7   Median :1950   Mode  :character   Median : 4.000  
##  Mean   : 7   Mean   :1950                      Mean   : 6.769  
##  3rd Qu.:10   3rd Qu.:1950                      3rd Qu.:10.000  
##  Max.   :13   Max.   :1950                      Max.   :22.000  
##     conceded        penalties         matches      shots_on_goal   
##  Min.   : 2.000   Min.   :0.0000   Min.   :1.000   Min.   : 0.000  
##  1st Qu.: 4.000   1st Qu.:0.0000   1st Qu.:3.000   1st Qu.: 2.000  
##  Median : 6.000   Median :0.0000   Median :3.000   Median : 4.000  
##  Mean   : 6.769   Mean   :0.2308   Mean   :3.385   Mean   : 6.769  
##  3rd Qu.: 8.000   3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.:10.000  
##  Max.   :17.000   Max.   :1.0000   Max.   :6.000   Max.   :22.000  
##       won            drawn             lost       wc_winner      
##  Min.   :0.000   Min.   :0.0000   Min.   :0.000   Mode :logical  
##  1st Qu.:1.000   1st Qu.:0.0000   1st Qu.:1.000   FALSE:12       
##  Median :1.000   Median :0.0000   Median :1.000   TRUE :1        
##  Mean   :1.462   Mean   :0.4615   Mean   :1.462                  
##  3rd Qu.:2.000   3rd Qu.:1.0000   3rd Qu.:2.000                  
##  Max.   :4.000   Max.   :1.0000   Max.   :3.000
table(cup$won)
## 
## 0 1 2 3 4 
## 3 5 2 2 1
table(cup$won, cup$scored)
##    
##     0 2 4 5 7 10 11 15 22
##   0 1 2 0 0 0  0  0  0  0
##   1 0 1 3 1 0  0  0  0  0
##   2 0 0 0 0 1  0  1  0  0
##   3 0 0 0 0 0  1  0  1  0
##   4 0 0 0 0 0  0  0  0  1
median(cup$scored)
## [1] 4
mean(cup$scored)
## [1] 6.769231

Indexing vs. subsetting

cup[2,]
##   X year    team scored conceded penalties matches shots_on_goal won drawn lost
## 2 2 1950 Uruguay     15        5         0       4            15   3     1    0
##   wc_winner
## 2      TRUE
cup[,2]
##  [1] 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950 1950
# these two are identical, book uses both
subset(cup, team == "United States")
##   X year          team scored conceded penalties matches shots_on_goal won
## 9 9 1950 United States      4        8         1       3             4   1
##   drawn lost wc_winner
## 9     0    2     FALSE
cup[cup$team == "United States"]
##    won
## 1    4
## 2    3
## 3    2
## 4    3
## 5    2
## 6    1
## 7    1
## 8    1
## 9    1
## 10   1
## 11   0
## 12   0
## 13   0
# why does this work?
# TRUE for one row
cup$team == "United States"
##  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE
## [13] FALSE
cup[9,]
##   X year          team scored conceded penalties matches shots_on_goal won
## 9 9 1950 United States      4        8         1       3             4   1
##   drawn lost wc_winner
## 9     0    2     FALSE
# can use multiple statements
subset(cup, scored > 10 & conceded < 10)
##   X year    team scored conceded penalties matches shots_on_goal won drawn lost
## 1 1 1950  Brazil     22        6         0       6            22   4     1    1
## 2 2 1950 Uruguay     15        5         0       4            15   3     1    0
##   wc_winner
## 1     FALSE
## 2      TRUE