Part II

Illya Mowerman

2/5/2018

Welcome

What we’ll cover

Data Wrangling with tidyverse

Please load some pakages

library(tidyverse)

library(AER)
data('Affairs')

affairs <- Affairs
summary(affairs)
##     affairs          gender         age         yearsmarried    children 
##  Min.   : 0.000   female:315   Min.   :17.50   Min.   : 0.125   no :171  
##  1st Qu.: 0.000   male  :286   1st Qu.:27.00   1st Qu.: 4.000   yes:430  
##  Median : 0.000                Median :32.00   Median : 7.000            
##  Mean   : 1.456                Mean   :32.49   Mean   : 8.178            
##  3rd Qu.: 0.000                3rd Qu.:37.00   3rd Qu.:15.000            
##  Max.   :12.000                Max.   :57.00   Max.   :15.000            
##  religiousness     education       occupation        rating     
##  Min.   :1.000   Min.   : 9.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:14.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :16.00   Median :5.000   Median :4.000  
##  Mean   :3.116   Mean   :16.17   Mean   :4.195   Mean   :3.932  
##  3rd Qu.:4.000   3rd Qu.:18.00   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :20.00   Max.   :7.000   Max.   :5.000

Basic wrangling verbs for today

arrange
filter
mutate
summarise
group_by
select
rename
inner_join
left_join
semi_join
anti_join
gather
spread

Sorting by rating and inserting into a new object

sorted_by_rating <- arrange(affairs , rating)

head(sorted_by_rating)
##   affairs gender age yearsmarried children religiousness education
## 1       0 female  27            4      yes             2        18
## 2       0 female  37           15      yes             4        14
## 3       0   male  57           15      yes             3        16
## 4       0 female  52           15      yes             5        17
## 5       0 female  37           15      yes             2        14
## 6       0 female  27            7      yes             2        12
##   occupation rating
## 1          6      1
## 2          3      1
## 3          6      1
## 4          1      1
## 5          1      1
## 6          5      1

Sorting by rating in descending order and inserting into a new object

sorted_by_desc_rating <- arrange(affairs , desc(rating))

head(sorted_by_rating)
##   affairs gender age yearsmarried children religiousness education
## 1       0 female  27            4      yes             2        18
## 2       0 female  37           15      yes             4        14
## 3       0   male  57           15      yes             3        16
## 4       0 female  52           15      yes             5        17
## 5       0 female  37           15      yes             2        14
## 6       0 female  27            7      yes             2        12
##   occupation rating
## 1          6      1
## 2          3      1
## 3          6      1
## 4          1      1
## 5          1      1
## 6          5      1

Sorting by rating in descending order and introducing the word then %>%

sorted_by_desc_rating <- affairs %>% arrange(desc(rating))

head(sorted_by_rating)
##   affairs gender age yearsmarried children religiousness education
## 1       0 female  27            4      yes             2        18
## 2       0 female  37           15      yes             4        14
## 3       0   male  57           15      yes             3        16
## 4       0 female  52           15      yes             5        17
## 5       0 female  37           15      yes             2        14
## 6       0 female  27            7      yes             2        12
##   occupation rating
## 1          6      1
## 2          3      1
## 3          6      1
## 4          1      1
## 5          1      1
## 6          5      1

Time for quick exercises using the affairs data

sorted_by_desc_rating <- affairs %>% arrange(desc(rating))
  1. Create a new object sorted by affairs in descending order
  2. Create a new object with affairs in descending order and religiousness

Filtering data - condition operators

== reads equal
!= not equal
>= greater than or equal
<= less than or equal
> greater than < less than

Filtering where rating = 5

filtered_rating_5 <- affairs %>% 
  filter(rating == 5)

nrow(filtered_rating_5)
## [1] 232
head(filtered_rating_5)
##   affairs gender age yearsmarried children religiousness education
## 1       0   male  57         15.0      yes             5        18
## 2       0 female  32          1.5       no             2        17
## 3       0   male  22          1.5       no             4        14
## 4       0 female  37         15.0      yes             1        17
## 5       0 female  22          1.5       no             2        16
## 6       0 female  27         10.0      yes             2        14
##   occupation rating
## 1          6      5
## 2          5      5
## 3          4      5
## 4          5      5
## 5          5      5
## 6          1      5

Filtering where rating = 5

filtered_rating_5 <- affairs %>% 
  filter(rating == 5)

nrow(filtered_rating_5)
## [1] 232
head(filtered_rating_5)
##   affairs gender age yearsmarried children religiousness education
## 1       0   male  57         15.0      yes             5        18
## 2       0 female  32          1.5       no             2        17
## 3       0   male  22          1.5       no             4        14
## 4       0 female  37         15.0      yes             1        17
## 5       0 female  22          1.5       no             2        16
## 6       0 female  27         10.0      yes             2        14
##   occupation rating
## 1          6      5
## 2          5      5
## 3          4      5
## 4          5      5
## 5          5      5
## 6          1      5

Filtering where rating = 3 with between

filtered_rating_3 <- affairs %>% 
  filter(rating > 2 , rating < 4)

nrow(filtered_rating_3)
## [1] 93
head(filtered_rating_3)
##   affairs gender age yearsmarried children religiousness education
## 1       0   male  22         0.75       no             2        17
## 2       0 female  22         0.75       no             2        12
## 3       0 female  37        15.00      yes             2        18
## 4       0   male  42        15.00      yes             4        20
## 5       0 female  22         1.50       no             4        16
## 6       0   male  52        15.00      yes             5        18
##   occupation rating
## 1          6      3
## 2          1      3
## 3          4      3
## 4          6      3
## 5          5      3
## 6          6      3

Filtering exercise using the affairs data

filtered_rating_3 <- affairs %>% 
  filter(rating > 2 , rating < 4)
  1. Create a new object with the data filtered where children = ‘Yes’
  2. Create a new object with the data filtered where
    1. affairs > 0
    2. religiousness == 5
    3. rating == 5

Creating a new variable: how do you define a cheater?

cheater <- affairs %>% 
  mutate(cheater = ifelse(affairs > 0 , 1 , 0))

head(arrange(cheater , affairs))
##   affairs gender age yearsmarried children religiousness education
## 1       0   male  37        10.00       no             3        18
## 2       0 female  27         4.00       no             4        14
## 3       0 female  32        15.00      yes             1        12
## 4       0   male  57        15.00      yes             5        18
## 5       0   male  22         0.75       no             2        17
## 6       0 female  32         1.50       no             2        17
##   occupation rating cheater
## 1          7      4       0
## 2          6      4       0
## 3          1      4       0
## 4          6      5       0
## 5          6      3       0
## 6          5      5       0
head(arrange(cheater , desc(affairs)))
##   affairs gender age yearsmarried children religiousness education
## 1      12 female  32         10.0      yes             3        17
## 2      12   male  37         15.0      yes             4        14
## 3      12 female  42         15.0      yes             5         9
## 4      12   male  37         10.0      yes             2        20
## 5      12 female  32         15.0      yes             3        14
## 6      12   male  27          1.5      yes             3        17
##   occupation rating cheater
## 1          5      2       1
## 2          5      2       1
## 3          4      1       1
## 4          6      2       1
## 5          1      2       1
## 6          5      4       1

Creating multiple new variables

cheater <- affairs %>% 
  mutate(cheater          = ifelse(affairs > 0 , 1 , 0) , 
         age_when_married = age - yearsmarried)

head(arrange(cheater , affairs))
##   affairs gender age yearsmarried children religiousness education
## 1       0   male  37        10.00       no             3        18
## 2       0 female  27         4.00       no             4        14
## 3       0 female  32        15.00      yes             1        12
## 4       0   male  57        15.00      yes             5        18
## 5       0   male  22         0.75       no             2        17
## 6       0 female  32         1.50       no             2        17
##   occupation rating cheater age_when_married
## 1          7      4       0            27.00
## 2          6      4       0            23.00
## 3          1      4       0            17.00
## 4          6      5       0            42.00
## 5          6      3       0            21.25
## 6          5      5       0            30.50
head(arrange(cheater , desc(affairs)))
##   affairs gender age yearsmarried children religiousness education
## 1      12 female  32         10.0      yes             3        17
## 2      12   male  37         15.0      yes             4        14
## 3      12 female  42         15.0      yes             5         9
## 4      12   male  37         10.0      yes             2        20
## 5      12 female  32         15.0      yes             3        14
## 6      12   male  27          1.5      yes             3        17
##   occupation rating cheater age_when_married
## 1          5      2       1             22.0
## 2          5      2       1             22.0
## 3          4      1       1             27.0
## 4          6      2       1             27.0
## 5          1      2       1             17.0
## 6          5      4       1             25.5
summary(cheater$cheater)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2496  0.0000  1.0000

By the way, I think the age when married is not correct

summary(cheater$age_when_married)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.50   20.50   22.00   24.31   27.00   45.00

Likely the variable age is the age when married rather than the current age.

Hope so…

Exercises using the affairs data

cheater <- affairs %>% 
  mutate(cheater = ifelse(affairs > 0 , 1 , 0))

Create a new object with two new variables:
1. cheater defined as having had at least 6 affairs
2. religiousness into rating (religiousness/rating)

Let’s do multiple verbs, taking advantage of the word %>%

Using the data affairs
THEN
create a new variable cheater
THEN
filtering for religiousness = 5 and cheater = 1

cheater_rel5 <- affairs %>% 
  mutate(cheater = ifelse(affairs > 0 , 1 , 0)) %>% 
  filter(religiousness == 5 ,
         cheater != 0)

nrow(cheater_rel5)
## [1] 13
summary(cheater_rel5)
##     affairs          gender       age         yearsmarried   children
##  Min.   : 1.000   female:7   Min.   :17.50   Min.   : 0.75   no : 1  
##  1st Qu.: 1.000   male  :6   1st Qu.:27.00   1st Qu.:10.00   yes:12  
##  Median : 3.000              Median :37.00   Median :15.00           
##  Mean   : 4.769              Mean   :36.65   Mean   :11.87           
##  3rd Qu.: 7.000              3rd Qu.:47.00   3rd Qu.:15.00           
##  Max.   :12.000              Max.   :57.00   Max.   :15.00           
##  religiousness   education       occupation        rating         cheater 
##  Min.   :5     Min.   : 9.00   Min.   :1.000   Min.   :1.000   Min.   :1  
##  1st Qu.:5     1st Qu.:14.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:1  
##  Median :5     Median :16.00   Median :5.000   Median :4.000   Median :1  
##  Mean   :5     Mean   :16.15   Mean   :4.308   Mean   :3.692   Mean   :1  
##  3rd Qu.:5     3rd Qu.:18.00   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:1  
##  Max.   :5     Max.   :20.00   Max.   :6.000   Max.   :5.000   Max.   :1

Summarizing data

You can create a new object with the mean cheater using native R

mean_cheater <- mean(cheater$cheater)

mean_cheater
## [1] 0.249584
str(mean_cheater)
##  num 0.25

In dplyr it would be:

mean_cheater <- cheater %>% summarise(mean_cheater = mean(cheater))

mean_cheater
##   mean_cheater
## 1     0.249584
str(mean_cheater)
## 'data.frame':    1 obs. of  1 variable:
##  $ mean_cheater: num 0.25

We can summarise multiple variables in one statement

summarise_cheater <- cheater %>% 
  summarise(mean_cheater = mean(cheater) , 
            mean_rel     = mean(religiousness))

summarise_cheater
##   mean_cheater mean_rel
## 1     0.249584 3.116473
str(summarise_cheater)
## 'data.frame':    1 obs. of  2 variables:
##  $ mean_cheater: num 0.25
##  $ mean_rel    : num 3.12

Exercise: summarise

summarise_cheater <- cheater %>% 
  summarise(mean_cheater = mean(cheater) , 
            mean_rel     = mean(religiousness))

Create a data frame using the cheater data that calculates:
1. The sum of cheater
2. The mean of rating

Now let’s summarise by group

Cheater proportion by religiousness

cheater_by_rel <- cheater %>% 
  group_by(religiousness) %>% 
  summarise(proportion_cheater = mean(cheater))

cheater_by_rel
## # A tibble: 5 x 2
##   religiousness proportion_cheater
##           <int>              <dbl>
## 1             1              0.417
## 2             2              0.250
## 3             3              0.333
## 4             4              0.174
## 5             5              0.186

group_by + mutate = ?

Cheater proportion by religiousness

cheater_by_rel <- cheater %>% 
  group_by(religiousness) %>% 
  mutate(proportion_cheater = mean(cheater)) %>% 
  select(1:3 , religiousness, cheater , proportion_cheater) %>% 
  ungroup()

head(cheater_by_rel)
## # A tibble: 6 x 6
##   affairs gender   age religiousness cheater proportion_cheater
##     <dbl> <fct>  <dbl>         <int>   <dbl>              <dbl>
## 1       0 male    37.0             3       0              0.333
## 2       0 female  27.0             4       0              0.174
## 3       0 female  32.0             1       0              0.417
## 4       0 male    57.0             5       0              0.186
## 5       0 male    22.0             2       0              0.250
## 6       0 female  32.0             2       0              0.250

This has many uses

The verb select is extremely powerful

Check out the documentation to see the multiple ways you can select variables

?dplyr::select

cheater %>% 
  select(starts_with('r'))
##     religiousness rating
## 1               3      4
## 2               4      4
## 3               1      4
## 4               5      5
## 5               2      3
## 6               2      5
## 7               2      3
## 8               2      4
## 9               4      2
## 10              4      5
## 11              2      2
## 12              4      4
## 13              5      4
## 14              2      4
## 15              4      4
## 16              1      5
## 17              2      3
## 18              3      4
## 19              2      5
## 20              2      5
## 21              2      5
## 22              2      5
## 23              4      4
## 24              3      5
## 25              2      4
## 26              2      5
## 27              4      5
## 28              5      4
## 29              3      5
## 30              3      4
## 31              4      3
## 32              3      5
## 33              4      4
## 34              5      4
## 35              1      4
## 36              4      3
## 37              3      4
## 38              4      5
## 39              1      5
## 40              3      5
## 41              5      5
## 42              5      3
## 43              5      4
## 44              2      1
## 45              5      3
## 46              3      5
## 47              4      5
## 48              2      4
## 49              4      5
## 50              2      4
## 51              5      5
## 52              2      4
## 53              5      5
## 54              4      4
## 55              1      5
## 56              4      1
## 57              5      4
## 58              4      5
## 59              4      4
## 60              4      4
## 61              5      2
## 62              3      4
## 63              2      5
## 64              2      5
## 65              4      4
## 66              2      5
## 67              4      4
## 68              3      5
## 69              5      5
## 70              2      4
## 71              3      5
## 72              5      5
## 73              2      5
## 74              4      3
## 75              1      5
## 76              4      5
## 77              3      5
## 78              3      5
## 79              3      5
## 80              2      2
## 81              3      5
## 82              4      5
## 83              2      4
## 84              4      3
## 85              4      5
## 86              1      4
## 87              4      5
## 88              3      2
## 89              3      5
## 90              3      4
## 91              4      4
## 92              2      3
## 93              4      4
## 94              5      3
## 95              1      4
## 96              5      5
## 97              3      1
## 98              1      4
## 99              3      5
## 100             4      5
## 101             5      5
## 102             2      4
## 103             4      5
## 104             5      5
## 105             4      4
## 106             4      4
## 107             4      4
## 108             4      4
## 109             3      3
## 110             2      4
## 111             4      5
## 112             4      5
## 113             5      5
## 114             5      4
## 115             3      4
## 116             4      5
## 117             4      2
## 118             5      4
## 119             4      5
## 120             4      4
## 121             2      5
## 122             5      4
## 123             4      3
## 124             2      2
## 125             3      5
## 126             5      5
## 127             2      5
## 128             4      5
## 129             2      5
## 130             2      5
## 131             4      3
## 132             4      5
## 133             2      5
## 134             2      3
## 135             3      3
## 136             3      4
## 137             2      5
## 138             2      4
## 139             4      5
## 140             1      4
## 141             2      4
## 142             5      4
## 143             5      3
## 144             2      4
## 145             3      3
## 146             1      4
## 147             4      4
## 148             5      3
## 149             4      4
## 150             1      4
## 151             4      3
## 152             2      3
## 153             2      3
## 154             4      3
## 155             2      3
## 156             4      3
## 157             5      1
## 158             4      2
## 159             4      3
## 160             2      5
## 161             2      5
## 162             2      5
## 163             5      5
## 164             5      4
## 165             3      4
## 166             4      2
## 167             2      2
## 168             4      4
## 169             3      4
## 170             4      2
## 171             4      5
## 172             4      5
## 173             4      5
## 174             3      4
## 175             2      5
## 176             2      2
## 177             3      4
## 178             2      4
## 179             4      3
## 180             4      4
## 181             5      2
## 182             4      5
## 183             3      3
## 184             4      5
## 185             3      5
## 186             4      5
## 187             3      5
## 188             2      5
## 189             4      5
## 190             1      5
## 191             3      5
## 192             4      4
## 193             4      4
## 194             4      4
## 195             2      3
## 196             1      4
## 197             4      3
## 198             2      5
## 199             4      2
## 200             4      5
## 201             5      5
## 202             2      5
## 203             3      5
## 204             2      3
## 205             2      2
## 206             2      3
## 207             4      2
## 208             1      5
## 209             2      4
## 210             4      3
## 211             3      3
## 212             2      5
## 213             2      2
## 214             2      5
## 215             2      5
## 216             5      5
## 217             4      5
## 218             4      3
## 219             1      4
## 220             5      5
## 221             3      3
## 222             4      5
## 223             4      5
## 224             4      4
## 225             3      4
## 226             1      5
## 227             4      4
## 228             2      4
## 229             4      5
## 230             3      5
## 231             5      4
## 232             4      5
## 233             4      5
## 234             4      4
## 235             4      5
## 236             4      3
## 237             5      5
## 238             5      5
## 239             3      5
## 240             4      4
## 241             2      4
## 242             3      4
## 243             1      5
## 244             3      5
## 245             4      4
## 246             3      4
## 247             3      3
## 248             4      5
## 249             3      4
## 250             5      4
## 251             4      3
## 252             2      4
## 253             4      5
## 254             4      4
## 255             1      5
## 256             3      4
## 257             2      5
## 258             2      4
## 259             4      4
## 260             2      5
## 261             3      5
## 262             4      3
## 263             1      4
## 264             2      5
## 265             3      3
## 266             2      5
## 267             2      1
## 268             2      5
## 269             4      5
## 270             3      5
## 271             4      5
## 272             3      5
## 273             2      3
## 274             2      3
## 275             4      4
## 276             2      1
## 277             5      3
## 278             2      5
## 279             4      4
## 280             1      5
## 281             4      4
## 282             4      4
## 283             4      4
## 284             3      3
## 285             3      4
## 286             3      2
## 287             3      5
## 288             3      2
## 289             3      2
## 290             2      4
## 291             3      4
## 292             2      5
## 293             5      4
## 294             4      3
## 295             4      5
## 296             2      3
## 297             4      5
## 298             2      5
## 299             4      3
## 300             5      4
## 301             2      2
## 302             2      5
## 303             2      4
## 304             3      5
## 305             4      4
## 306             2      4
## 307             4      5
## 308             5      3
## 309             4      3
## 310             3      5
## 311             2      2
## 312             4      5
## 313             5      5
## 314             1      4
## 315             5      3
## 316             4      5
## 317             2      5
## 318             2      4
## 319             2      4
## 320             4      4
## 321             3      5
## 322             2      5
## 323             4      4
## 324             4      5
## 325             2      5
## 326             1      5
## 327             3      4
## 328             2      5
## 329             5      5
## 330             4      3
## 331             2      4
## 332             4      4
## 333             4      4
## 334             5      5
## 335             3      5
## 336             4      2
## 337             4      5
## 338             4      5
## 339             3      5
## 340             2      5
## 341             2      5
## 342             2      3
## 343             2      4
## 344             2      5
## 345             4      4
## 346             4      4
## 347             4      5
## 348             3      3
## 349             4      3
## 350             5      2
## 351             2      5
## 352             3      5
## 353             5      4
## 354             4      5
## 355             4      5
## 356             2      5
## 357             3      3
## 358             2      5
## 359             4      4
## 360             3      5
## 361             3      5
## 362             5      5
## 363             5      4
## 364             3      5
## 365             4      5
## 366             3      5
## 367             4      5
## 368             4      5
## 369             1      4
## 370             3      4
## 371             4      2
## 372             5      2
## 373             2      5
## 374             4      3
## 375             4      5
## 376             4      4
## 377             3      5
## 378             3      4
## 379             3      5
## 380             2      5
## 381             1      5
## 382             4      5
## 383             4      5
## 384             4      4
## 385             2      2
## 386             4      5
## 387             2      4
## 388             2      4
## 389             2      5
## 390             3      4
## 391             5      5
## 392             4      4
## 393             3      2
## 394             3      2
## 395             2      2
## 396             5      5
## 397             1      5
## 398             1      5
## 399             4      5
## 400             2      3
## 401             5      5
## 402             4      5
## 403             5      4
## 404             2      3
## 405             4      1
## 406             4      5
## 407             2      4
## 408             4      5
## 409             2      4
## 410             4      5
## 411             2      3
## 412             2      5
## 413             4      3
## 414             4      4
## 415             3      5
## 416             2      3
## 417             4      5
## 418             4      3
## 419             3      5
## 420             4      5
## 421             4      5
## 422             2      4
## 423             2      1
## 424             5      4
## 425             2      4
## 426             4      5
## 427             1      5
## 428             4      4
## 429             5      3
## 430             4      4
## 431             2      4
## 432             3      5
## 433             4      4
## 434             5      5
## 435             2      2
## 436             3      4
## 437             2      4
## 438             4      4
## 439             4      3
## 440             2      2
## 441             3      3
## 442             3      5
## 443             3      4
## 444             2      5
## 445             4      4
## 446             4      5
## 447             2      5
## 448             4      4
## 449             2      2
## 450             3      4
## 451             2      4
## 452             3      4
## 453             3      5
## 454             5      2
## 455             3      2
## 456             4      5
## 457             2      5
## 458             4      2
## 459             2      4
## 460             2      4
## 461             4      2
## 462             4      2
## 463             3      4
## 464             5      1
## 465             2      2
## 466             3      2
## 467             1      5
## 468             2      3
## 469             3      5
## 470             4      5
## 471             5      5
## 472             3      4
## 473             5      4
## 474             3      4
## 475             4      2
## 476             4      4
## 477             4      3
## 478             2      2
## 479             4      4
## 480             3      3
## 481             4      4
## 482             2      2
## 483             5      5
## 484             4      5
## 485             2      4
## 486             2      4
## 487             1      3
## 488             2      5
## 489             2      4
## 490             1      3
## 491             2      3
## 492             3      5
## 493             3      4
## 494             1      2
## 495             3      5
## 496             4      5
## 497             4      1
## 498             3      4
## 499             4      5
## 500             3      4
## 501             5      4
## 502             2      4
## 503             2      4
## 504             2      5
## 505             5      3
## 506             3      3
## 507             4      4
## 508             3      4
## 509             2      1
## 510             1      2
## 511             3      4
## 512             4      4
## 513             1      4
## 514             3      2
## 515             1      4
## 516             4      4
## 517             1      4
## 518             4      2
## 519             1      3
## 520             3      1
## 521             3      4
## 522             3      3
## 523             2      2
## 524             4      5
## 525             3      4
## 526             1      5
## 527             3      3
## 528             2      5
## 529             3      3
## 530             2      4
## 531             4      2
## 532             3      4
## 533             1      3
## 534             2      3
## 535             3      3
## 536             5      5
## 537             2      2
## 538             1      3
## 539             4      2
## 540             2      4
## 541             2      3
## 542             3      4
## 543             4      3
## 544             4      5
## 545             2      2
## 546             5      3
## 547             3      1
## 548             2      2
## 549             3      4
## 550             2      2
## 551             3      1
## 552             5      5
## 553             4      4
## 554             2      3
## 555             3      3
## 556             5      2
## 557             2      4
## 558             2      4
## 559             1      2
## 560             3      3
## 561             4      1
## 562             2      5
## 563             2      2
## 564             2      4
## 565             2      5
## 566             4      5
## 567             3      3
## 568             4      5
## 569             1      5
## 570             2      4
## 571             1      1
## 572             2      5
## 573             5      4
## 574             1      5
## 575             4      4
## 576             3      2
## 577             2      4
## 578             4      4
## 579             3      2
## 580             3      3
## 581             1      4
## 582             4      3
## 583             3      4
## 584             2      3
## 585             3      2
## 586             5      5
## 587             1      5
## 588             1      5
## 589             4      4
## 590             3      2
## 591             4      2
## 592             3      2
## 593             2      2
## 594             2      4
## 595             4      3
## 596             3      2
## 597             1      5
## 598             2      4
## 599             2      5
## 600             3      2
## 601             3      5

Exercises: group_by

  1. Create a data frame grouped by children (Yes/No), summarizing:
    • mean religiousness
    • mean rating
    • mean cheater
  2. Create the same table using the mutate statement

By now, renaming is all but covered

Renaming religiousness to rel

cheater_renamed <- cheater %>% 
  rename(rel = religiousness)

summary(cheater_renamed)
##     affairs          gender         age         yearsmarried    children 
##  Min.   : 0.000   female:315   Min.   :17.50   Min.   : 0.125   no :171  
##  1st Qu.: 0.000   male  :286   1st Qu.:27.00   1st Qu.: 4.000   yes:430  
##  Median : 0.000                Median :32.00   Median : 7.000            
##  Mean   : 1.456                Mean   :32.49   Mean   : 8.178            
##  3rd Qu.: 0.000                3rd Qu.:37.00   3rd Qu.:15.000            
##  Max.   :12.000                Max.   :57.00   Max.   :15.000            
##       rel          education       occupation        rating     
##  Min.   :1.000   Min.   : 9.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:14.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :16.00   Median :5.000   Median :4.000  
##  Mean   :3.116   Mean   :16.17   Mean   :4.195   Mean   :3.932  
##  3rd Qu.:4.000   3rd Qu.:18.00   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :20.00   Max.   :7.000   Max.   :5.000  
##     cheater      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2496  
##  3rd Qu.:0.0000  
##  Max.   :1.0000

inner_join

band_members
## # A tibble: 3 x 2
##   name  band   
##   <chr> <chr>  
## 1 Mick  Stones 
## 2 John  Beatles
## 3 Paul  Beatles
band_instruments
## # A tibble: 3 x 2
##   name  plays 
##   <chr> <chr> 
## 1 John  guitar
## 2 Paul  bass  
## 3 Keith guitar
inner <- inner_join(band_members , band_instruments)
## Joining, by = "name"
inner
## # A tibble: 2 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 John  Beatles guitar
## 2 Paul  Beatles bass

left_join

left <- left_join(band_members , band_instruments)
## Joining, by = "name"
left
## # A tibble: 3 x 3
##   name  band    plays 
##   <chr> <chr>   <chr> 
## 1 Mick  Stones  <NA>  
## 2 John  Beatles guitar
## 3 Paul  Beatles bass

semi_join

semi <- semi_join(band_members , band_instruments)
## Joining, by = "name"
semi
## # A tibble: 2 x 2
##   name  band   
##   <chr> <chr>  
## 1 John  Beatles
## 2 Paul  Beatles

anti_join

anti <- anti_join(band_members , band_instruments)
## Joining, by = "name"
anti
## # A tibble: 1 x 2
##   name  band  
##   <chr> <chr> 
## 1 Mick  Stones

Transposing: gather

summarise_cheater <- cheater %>% 
  group_by(children) %>% 
  summarise(mean_cheater = mean(cheater) , 
            mean_rel     = mean(religiousness))

summarise_cheater
## # A tibble: 2 x 3
##   children mean_cheater mean_rel
##   <fct>           <dbl>    <dbl>
## 1 no              0.158     2.88
## 2 yes             0.286     3.21
gathered <- summarise_cheater %>% 
  gather(metric , value , -children)

gathered
## # A tibble: 4 x 3
##   children metric       value
##   <fct>    <chr>        <dbl>
## 1 no       mean_cheater 0.158
## 2 yes      mean_cheater 0.286
## 3 no       mean_rel     2.88 
## 4 yes      mean_rel     3.21

Transposing: spreading

gathered
## # A tibble: 4 x 3
##   children metric       value
##   <fct>    <chr>        <dbl>
## 1 no       mean_cheater 0.158
## 2 yes      mean_cheater 0.286
## 3 no       mean_rel     2.88 
## 4 yes      mean_rel     3.21
spreading <- gathered %>% 
  spread(metric , value)

spreading
## # A tibble: 2 x 3
##   children mean_cheater mean_rel
## * <fct>           <dbl>    <dbl>
## 1 no              0.158     2.88
## 2 yes             0.286     3.21

Next time…