Illya Mowerman
2/5/2018
Data Wrangling with tidyverse
library(tidyverse)
library(AER)
data('Affairs')
affairs <- Affairssummary(affairs)## affairs gender age yearsmarried children
## Min. : 0.000 female:315 Min. :17.50 Min. : 0.125 no :171
## 1st Qu.: 0.000 male :286 1st Qu.:27.00 1st Qu.: 4.000 yes:430
## Median : 0.000 Median :32.00 Median : 7.000
## Mean : 1.456 Mean :32.49 Mean : 8.178
## 3rd Qu.: 0.000 3rd Qu.:37.00 3rd Qu.:15.000
## Max. :12.000 Max. :57.00 Max. :15.000
## religiousness education occupation rating
## Min. :1.000 Min. : 9.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:14.00 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :16.00 Median :5.000 Median :4.000
## Mean :3.116 Mean :16.17 Mean :4.195 Mean :3.932
## 3rd Qu.:4.000 3rd Qu.:18.00 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :5.000 Max. :20.00 Max. :7.000 Max. :5.000
arrange
filter
mutate
summarise
group_by
select
rename
inner_join
left_join
semi_join
anti_join
gather
spread
sorted_by_rating <- arrange(affairs , rating)
head(sorted_by_rating)## affairs gender age yearsmarried children religiousness education
## 1 0 female 27 4 yes 2 18
## 2 0 female 37 15 yes 4 14
## 3 0 male 57 15 yes 3 16
## 4 0 female 52 15 yes 5 17
## 5 0 female 37 15 yes 2 14
## 6 0 female 27 7 yes 2 12
## occupation rating
## 1 6 1
## 2 3 1
## 3 6 1
## 4 1 1
## 5 1 1
## 6 5 1
sorted_by_desc_rating <- arrange(affairs , desc(rating))
head(sorted_by_rating)## affairs gender age yearsmarried children religiousness education
## 1 0 female 27 4 yes 2 18
## 2 0 female 37 15 yes 4 14
## 3 0 male 57 15 yes 3 16
## 4 0 female 52 15 yes 5 17
## 5 0 female 37 15 yes 2 14
## 6 0 female 27 7 yes 2 12
## occupation rating
## 1 6 1
## 2 3 1
## 3 6 1
## 4 1 1
## 5 1 1
## 6 5 1
sorted_by_desc_rating <- affairs %>% arrange(desc(rating))
head(sorted_by_rating)## affairs gender age yearsmarried children religiousness education
## 1 0 female 27 4 yes 2 18
## 2 0 female 37 15 yes 4 14
## 3 0 male 57 15 yes 3 16
## 4 0 female 52 15 yes 5 17
## 5 0 female 37 15 yes 2 14
## 6 0 female 27 7 yes 2 12
## occupation rating
## 1 6 1
## 2 3 1
## 3 6 1
## 4 1 1
## 5 1 1
## 6 5 1
sorted_by_desc_rating <- affairs %>% arrange(desc(rating))== reads equal
!= not equal
>= greater than or equal
<= less than or equal
> greater than < less than
filtered_rating_5 <- affairs %>%
filter(rating == 5)
nrow(filtered_rating_5)## [1] 232
head(filtered_rating_5)## affairs gender age yearsmarried children religiousness education
## 1 0 male 57 15.0 yes 5 18
## 2 0 female 32 1.5 no 2 17
## 3 0 male 22 1.5 no 4 14
## 4 0 female 37 15.0 yes 1 17
## 5 0 female 22 1.5 no 2 16
## 6 0 female 27 10.0 yes 2 14
## occupation rating
## 1 6 5
## 2 5 5
## 3 4 5
## 4 5 5
## 5 5 5
## 6 1 5
filtered_rating_5 <- affairs %>%
filter(rating == 5)
nrow(filtered_rating_5)## [1] 232
head(filtered_rating_5)## affairs gender age yearsmarried children religiousness education
## 1 0 male 57 15.0 yes 5 18
## 2 0 female 32 1.5 no 2 17
## 3 0 male 22 1.5 no 4 14
## 4 0 female 37 15.0 yes 1 17
## 5 0 female 22 1.5 no 2 16
## 6 0 female 27 10.0 yes 2 14
## occupation rating
## 1 6 5
## 2 5 5
## 3 4 5
## 4 5 5
## 5 5 5
## 6 1 5
filtered_rating_3 <- affairs %>%
filter(rating > 2 , rating < 4)
nrow(filtered_rating_3)## [1] 93
head(filtered_rating_3)## affairs gender age yearsmarried children religiousness education
## 1 0 male 22 0.75 no 2 17
## 2 0 female 22 0.75 no 2 12
## 3 0 female 37 15.00 yes 2 18
## 4 0 male 42 15.00 yes 4 20
## 5 0 female 22 1.50 no 4 16
## 6 0 male 52 15.00 yes 5 18
## occupation rating
## 1 6 3
## 2 1 3
## 3 4 3
## 4 6 3
## 5 5 3
## 6 6 3
filtered_rating_3 <- affairs %>%
filter(rating > 2 , rating < 4)cheater <- affairs %>%
mutate(cheater = ifelse(affairs > 0 , 1 , 0))
head(arrange(cheater , affairs))## affairs gender age yearsmarried children religiousness education
## 1 0 male 37 10.00 no 3 18
## 2 0 female 27 4.00 no 4 14
## 3 0 female 32 15.00 yes 1 12
## 4 0 male 57 15.00 yes 5 18
## 5 0 male 22 0.75 no 2 17
## 6 0 female 32 1.50 no 2 17
## occupation rating cheater
## 1 7 4 0
## 2 6 4 0
## 3 1 4 0
## 4 6 5 0
## 5 6 3 0
## 6 5 5 0
head(arrange(cheater , desc(affairs)))## affairs gender age yearsmarried children religiousness education
## 1 12 female 32 10.0 yes 3 17
## 2 12 male 37 15.0 yes 4 14
## 3 12 female 42 15.0 yes 5 9
## 4 12 male 37 10.0 yes 2 20
## 5 12 female 32 15.0 yes 3 14
## 6 12 male 27 1.5 yes 3 17
## occupation rating cheater
## 1 5 2 1
## 2 5 2 1
## 3 4 1 1
## 4 6 2 1
## 5 1 2 1
## 6 5 4 1
cheater <- affairs %>%
mutate(cheater = ifelse(affairs > 0 , 1 , 0) ,
age_when_married = age - yearsmarried)
head(arrange(cheater , affairs))## affairs gender age yearsmarried children religiousness education
## 1 0 male 37 10.00 no 3 18
## 2 0 female 27 4.00 no 4 14
## 3 0 female 32 15.00 yes 1 12
## 4 0 male 57 15.00 yes 5 18
## 5 0 male 22 0.75 no 2 17
## 6 0 female 32 1.50 no 2 17
## occupation rating cheater age_when_married
## 1 7 4 0 27.00
## 2 6 4 0 23.00
## 3 1 4 0 17.00
## 4 6 5 0 42.00
## 5 6 3 0 21.25
## 6 5 5 0 30.50
head(arrange(cheater , desc(affairs)))## affairs gender age yearsmarried children religiousness education
## 1 12 female 32 10.0 yes 3 17
## 2 12 male 37 15.0 yes 4 14
## 3 12 female 42 15.0 yes 5 9
## 4 12 male 37 10.0 yes 2 20
## 5 12 female 32 15.0 yes 3 14
## 6 12 male 27 1.5 yes 3 17
## occupation rating cheater age_when_married
## 1 5 2 1 22.0
## 2 5 2 1 22.0
## 3 4 1 1 27.0
## 4 6 2 1 27.0
## 5 1 2 1 17.0
## 6 5 4 1 25.5
summary(cheater$cheater)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2496 0.0000 1.0000
summary(cheater$age_when_married)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.50 20.50 22.00 24.31 27.00 45.00
Likely the variable age is the age when married rather than the current age.
Hope so…
cheater <- affairs %>%
mutate(cheater = ifelse(affairs > 0 , 1 , 0))Create a new object with two new variables:
1. cheater defined as having had at least 6 affairs
2. religiousness into rating (religiousness/rating)
Using the data affairs
THEN
create a new variable cheater
THEN
filtering for religiousness = 5 and cheater = 1
cheater_rel5 <- affairs %>%
mutate(cheater = ifelse(affairs > 0 , 1 , 0)) %>%
filter(religiousness == 5 ,
cheater != 0)
nrow(cheater_rel5)## [1] 13
summary(cheater_rel5)## affairs gender age yearsmarried children
## Min. : 1.000 female:7 Min. :17.50 Min. : 0.75 no : 1
## 1st Qu.: 1.000 male :6 1st Qu.:27.00 1st Qu.:10.00 yes:12
## Median : 3.000 Median :37.00 Median :15.00
## Mean : 4.769 Mean :36.65 Mean :11.87
## 3rd Qu.: 7.000 3rd Qu.:47.00 3rd Qu.:15.00
## Max. :12.000 Max. :57.00 Max. :15.00
## religiousness education occupation rating cheater
## Min. :5 Min. : 9.00 Min. :1.000 Min. :1.000 Min. :1
## 1st Qu.:5 1st Qu.:14.00 1st Qu.:4.000 1st Qu.:3.000 1st Qu.:1
## Median :5 Median :16.00 Median :5.000 Median :4.000 Median :1
## Mean :5 Mean :16.15 Mean :4.308 Mean :3.692 Mean :1
## 3rd Qu.:5 3rd Qu.:18.00 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:1
## Max. :5 Max. :20.00 Max. :6.000 Max. :5.000 Max. :1
You can create a new object with the mean cheater using native R
mean_cheater <- mean(cheater$cheater)
mean_cheater## [1] 0.249584
str(mean_cheater)## num 0.25
In dplyr it would be:
mean_cheater <- cheater %>% summarise(mean_cheater = mean(cheater))
mean_cheater## mean_cheater
## 1 0.249584
str(mean_cheater)## 'data.frame': 1 obs. of 1 variable:
## $ mean_cheater: num 0.25
summarise_cheater <- cheater %>%
summarise(mean_cheater = mean(cheater) ,
mean_rel = mean(religiousness))
summarise_cheater## mean_cheater mean_rel
## 1 0.249584 3.116473
str(summarise_cheater)## 'data.frame': 1 obs. of 2 variables:
## $ mean_cheater: num 0.25
## $ mean_rel : num 3.12
summarise_cheater <- cheater %>%
summarise(mean_cheater = mean(cheater) ,
mean_rel = mean(religiousness))Create a data frame using the cheater data that calculates:
1. The sum of cheater
2. The mean of rating
Cheater proportion by religiousness
cheater_by_rel <- cheater %>%
group_by(religiousness) %>%
summarise(proportion_cheater = mean(cheater))
cheater_by_rel## # A tibble: 5 x 2
## religiousness proportion_cheater
## <int> <dbl>
## 1 1 0.417
## 2 2 0.250
## 3 3 0.333
## 4 4 0.174
## 5 5 0.186
Cheater proportion by religiousness
cheater_by_rel <- cheater %>%
group_by(religiousness) %>%
mutate(proportion_cheater = mean(cheater)) %>%
select(1:3 , religiousness, cheater , proportion_cheater) %>%
ungroup()
head(cheater_by_rel)## # A tibble: 6 x 6
## affairs gender age religiousness cheater proportion_cheater
## <dbl> <fct> <dbl> <int> <dbl> <dbl>
## 1 0 male 37.0 3 0 0.333
## 2 0 female 27.0 4 0 0.174
## 3 0 female 32.0 1 0 0.417
## 4 0 male 57.0 5 0 0.186
## 5 0 male 22.0 2 0 0.250
## 6 0 female 32.0 2 0 0.250
This has many uses
Check out the documentation to see the multiple ways you can select variables
?dplyr::select
cheater %>%
select(starts_with('r'))## religiousness rating
## 1 3 4
## 2 4 4
## 3 1 4
## 4 5 5
## 5 2 3
## 6 2 5
## 7 2 3
## 8 2 4
## 9 4 2
## 10 4 5
## 11 2 2
## 12 4 4
## 13 5 4
## 14 2 4
## 15 4 4
## 16 1 5
## 17 2 3
## 18 3 4
## 19 2 5
## 20 2 5
## 21 2 5
## 22 2 5
## 23 4 4
## 24 3 5
## 25 2 4
## 26 2 5
## 27 4 5
## 28 5 4
## 29 3 5
## 30 3 4
## 31 4 3
## 32 3 5
## 33 4 4
## 34 5 4
## 35 1 4
## 36 4 3
## 37 3 4
## 38 4 5
## 39 1 5
## 40 3 5
## 41 5 5
## 42 5 3
## 43 5 4
## 44 2 1
## 45 5 3
## 46 3 5
## 47 4 5
## 48 2 4
## 49 4 5
## 50 2 4
## 51 5 5
## 52 2 4
## 53 5 5
## 54 4 4
## 55 1 5
## 56 4 1
## 57 5 4
## 58 4 5
## 59 4 4
## 60 4 4
## 61 5 2
## 62 3 4
## 63 2 5
## 64 2 5
## 65 4 4
## 66 2 5
## 67 4 4
## 68 3 5
## 69 5 5
## 70 2 4
## 71 3 5
## 72 5 5
## 73 2 5
## 74 4 3
## 75 1 5
## 76 4 5
## 77 3 5
## 78 3 5
## 79 3 5
## 80 2 2
## 81 3 5
## 82 4 5
## 83 2 4
## 84 4 3
## 85 4 5
## 86 1 4
## 87 4 5
## 88 3 2
## 89 3 5
## 90 3 4
## 91 4 4
## 92 2 3
## 93 4 4
## 94 5 3
## 95 1 4
## 96 5 5
## 97 3 1
## 98 1 4
## 99 3 5
## 100 4 5
## 101 5 5
## 102 2 4
## 103 4 5
## 104 5 5
## 105 4 4
## 106 4 4
## 107 4 4
## 108 4 4
## 109 3 3
## 110 2 4
## 111 4 5
## 112 4 5
## 113 5 5
## 114 5 4
## 115 3 4
## 116 4 5
## 117 4 2
## 118 5 4
## 119 4 5
## 120 4 4
## 121 2 5
## 122 5 4
## 123 4 3
## 124 2 2
## 125 3 5
## 126 5 5
## 127 2 5
## 128 4 5
## 129 2 5
## 130 2 5
## 131 4 3
## 132 4 5
## 133 2 5
## 134 2 3
## 135 3 3
## 136 3 4
## 137 2 5
## 138 2 4
## 139 4 5
## 140 1 4
## 141 2 4
## 142 5 4
## 143 5 3
## 144 2 4
## 145 3 3
## 146 1 4
## 147 4 4
## 148 5 3
## 149 4 4
## 150 1 4
## 151 4 3
## 152 2 3
## 153 2 3
## 154 4 3
## 155 2 3
## 156 4 3
## 157 5 1
## 158 4 2
## 159 4 3
## 160 2 5
## 161 2 5
## 162 2 5
## 163 5 5
## 164 5 4
## 165 3 4
## 166 4 2
## 167 2 2
## 168 4 4
## 169 3 4
## 170 4 2
## 171 4 5
## 172 4 5
## 173 4 5
## 174 3 4
## 175 2 5
## 176 2 2
## 177 3 4
## 178 2 4
## 179 4 3
## 180 4 4
## 181 5 2
## 182 4 5
## 183 3 3
## 184 4 5
## 185 3 5
## 186 4 5
## 187 3 5
## 188 2 5
## 189 4 5
## 190 1 5
## 191 3 5
## 192 4 4
## 193 4 4
## 194 4 4
## 195 2 3
## 196 1 4
## 197 4 3
## 198 2 5
## 199 4 2
## 200 4 5
## 201 5 5
## 202 2 5
## 203 3 5
## 204 2 3
## 205 2 2
## 206 2 3
## 207 4 2
## 208 1 5
## 209 2 4
## 210 4 3
## 211 3 3
## 212 2 5
## 213 2 2
## 214 2 5
## 215 2 5
## 216 5 5
## 217 4 5
## 218 4 3
## 219 1 4
## 220 5 5
## 221 3 3
## 222 4 5
## 223 4 5
## 224 4 4
## 225 3 4
## 226 1 5
## 227 4 4
## 228 2 4
## 229 4 5
## 230 3 5
## 231 5 4
## 232 4 5
## 233 4 5
## 234 4 4
## 235 4 5
## 236 4 3
## 237 5 5
## 238 5 5
## 239 3 5
## 240 4 4
## 241 2 4
## 242 3 4
## 243 1 5
## 244 3 5
## 245 4 4
## 246 3 4
## 247 3 3
## 248 4 5
## 249 3 4
## 250 5 4
## 251 4 3
## 252 2 4
## 253 4 5
## 254 4 4
## 255 1 5
## 256 3 4
## 257 2 5
## 258 2 4
## 259 4 4
## 260 2 5
## 261 3 5
## 262 4 3
## 263 1 4
## 264 2 5
## 265 3 3
## 266 2 5
## 267 2 1
## 268 2 5
## 269 4 5
## 270 3 5
## 271 4 5
## 272 3 5
## 273 2 3
## 274 2 3
## 275 4 4
## 276 2 1
## 277 5 3
## 278 2 5
## 279 4 4
## 280 1 5
## 281 4 4
## 282 4 4
## 283 4 4
## 284 3 3
## 285 3 4
## 286 3 2
## 287 3 5
## 288 3 2
## 289 3 2
## 290 2 4
## 291 3 4
## 292 2 5
## 293 5 4
## 294 4 3
## 295 4 5
## 296 2 3
## 297 4 5
## 298 2 5
## 299 4 3
## 300 5 4
## 301 2 2
## 302 2 5
## 303 2 4
## 304 3 5
## 305 4 4
## 306 2 4
## 307 4 5
## 308 5 3
## 309 4 3
## 310 3 5
## 311 2 2
## 312 4 5
## 313 5 5
## 314 1 4
## 315 5 3
## 316 4 5
## 317 2 5
## 318 2 4
## 319 2 4
## 320 4 4
## 321 3 5
## 322 2 5
## 323 4 4
## 324 4 5
## 325 2 5
## 326 1 5
## 327 3 4
## 328 2 5
## 329 5 5
## 330 4 3
## 331 2 4
## 332 4 4
## 333 4 4
## 334 5 5
## 335 3 5
## 336 4 2
## 337 4 5
## 338 4 5
## 339 3 5
## 340 2 5
## 341 2 5
## 342 2 3
## 343 2 4
## 344 2 5
## 345 4 4
## 346 4 4
## 347 4 5
## 348 3 3
## 349 4 3
## 350 5 2
## 351 2 5
## 352 3 5
## 353 5 4
## 354 4 5
## 355 4 5
## 356 2 5
## 357 3 3
## 358 2 5
## 359 4 4
## 360 3 5
## 361 3 5
## 362 5 5
## 363 5 4
## 364 3 5
## 365 4 5
## 366 3 5
## 367 4 5
## 368 4 5
## 369 1 4
## 370 3 4
## 371 4 2
## 372 5 2
## 373 2 5
## 374 4 3
## 375 4 5
## 376 4 4
## 377 3 5
## 378 3 4
## 379 3 5
## 380 2 5
## 381 1 5
## 382 4 5
## 383 4 5
## 384 4 4
## 385 2 2
## 386 4 5
## 387 2 4
## 388 2 4
## 389 2 5
## 390 3 4
## 391 5 5
## 392 4 4
## 393 3 2
## 394 3 2
## 395 2 2
## 396 5 5
## 397 1 5
## 398 1 5
## 399 4 5
## 400 2 3
## 401 5 5
## 402 4 5
## 403 5 4
## 404 2 3
## 405 4 1
## 406 4 5
## 407 2 4
## 408 4 5
## 409 2 4
## 410 4 5
## 411 2 3
## 412 2 5
## 413 4 3
## 414 4 4
## 415 3 5
## 416 2 3
## 417 4 5
## 418 4 3
## 419 3 5
## 420 4 5
## 421 4 5
## 422 2 4
## 423 2 1
## 424 5 4
## 425 2 4
## 426 4 5
## 427 1 5
## 428 4 4
## 429 5 3
## 430 4 4
## 431 2 4
## 432 3 5
## 433 4 4
## 434 5 5
## 435 2 2
## 436 3 4
## 437 2 4
## 438 4 4
## 439 4 3
## 440 2 2
## 441 3 3
## 442 3 5
## 443 3 4
## 444 2 5
## 445 4 4
## 446 4 5
## 447 2 5
## 448 4 4
## 449 2 2
## 450 3 4
## 451 2 4
## 452 3 4
## 453 3 5
## 454 5 2
## 455 3 2
## 456 4 5
## 457 2 5
## 458 4 2
## 459 2 4
## 460 2 4
## 461 4 2
## 462 4 2
## 463 3 4
## 464 5 1
## 465 2 2
## 466 3 2
## 467 1 5
## 468 2 3
## 469 3 5
## 470 4 5
## 471 5 5
## 472 3 4
## 473 5 4
## 474 3 4
## 475 4 2
## 476 4 4
## 477 4 3
## 478 2 2
## 479 4 4
## 480 3 3
## 481 4 4
## 482 2 2
## 483 5 5
## 484 4 5
## 485 2 4
## 486 2 4
## 487 1 3
## 488 2 5
## 489 2 4
## 490 1 3
## 491 2 3
## 492 3 5
## 493 3 4
## 494 1 2
## 495 3 5
## 496 4 5
## 497 4 1
## 498 3 4
## 499 4 5
## 500 3 4
## 501 5 4
## 502 2 4
## 503 2 4
## 504 2 5
## 505 5 3
## 506 3 3
## 507 4 4
## 508 3 4
## 509 2 1
## 510 1 2
## 511 3 4
## 512 4 4
## 513 1 4
## 514 3 2
## 515 1 4
## 516 4 4
## 517 1 4
## 518 4 2
## 519 1 3
## 520 3 1
## 521 3 4
## 522 3 3
## 523 2 2
## 524 4 5
## 525 3 4
## 526 1 5
## 527 3 3
## 528 2 5
## 529 3 3
## 530 2 4
## 531 4 2
## 532 3 4
## 533 1 3
## 534 2 3
## 535 3 3
## 536 5 5
## 537 2 2
## 538 1 3
## 539 4 2
## 540 2 4
## 541 2 3
## 542 3 4
## 543 4 3
## 544 4 5
## 545 2 2
## 546 5 3
## 547 3 1
## 548 2 2
## 549 3 4
## 550 2 2
## 551 3 1
## 552 5 5
## 553 4 4
## 554 2 3
## 555 3 3
## 556 5 2
## 557 2 4
## 558 2 4
## 559 1 2
## 560 3 3
## 561 4 1
## 562 2 5
## 563 2 2
## 564 2 4
## 565 2 5
## 566 4 5
## 567 3 3
## 568 4 5
## 569 1 5
## 570 2 4
## 571 1 1
## 572 2 5
## 573 5 4
## 574 1 5
## 575 4 4
## 576 3 2
## 577 2 4
## 578 4 4
## 579 3 2
## 580 3 3
## 581 1 4
## 582 4 3
## 583 3 4
## 584 2 3
## 585 3 2
## 586 5 5
## 587 1 5
## 588 1 5
## 589 4 4
## 590 3 2
## 591 4 2
## 592 3 2
## 593 2 2
## 594 2 4
## 595 4 3
## 596 3 2
## 597 1 5
## 598 2 4
## 599 2 5
## 600 3 2
## 601 3 5
Renaming religiousness to rel
cheater_renamed <- cheater %>%
rename(rel = religiousness)
summary(cheater_renamed)## affairs gender age yearsmarried children
## Min. : 0.000 female:315 Min. :17.50 Min. : 0.125 no :171
## 1st Qu.: 0.000 male :286 1st Qu.:27.00 1st Qu.: 4.000 yes:430
## Median : 0.000 Median :32.00 Median : 7.000
## Mean : 1.456 Mean :32.49 Mean : 8.178
## 3rd Qu.: 0.000 3rd Qu.:37.00 3rd Qu.:15.000
## Max. :12.000 Max. :57.00 Max. :15.000
## rel education occupation rating
## Min. :1.000 Min. : 9.00 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:14.00 1st Qu.:3.000 1st Qu.:3.000
## Median :3.000 Median :16.00 Median :5.000 Median :4.000
## Mean :3.116 Mean :16.17 Mean :4.195 Mean :3.932
## 3rd Qu.:4.000 3rd Qu.:18.00 3rd Qu.:6.000 3rd Qu.:5.000
## Max. :5.000 Max. :20.00 Max. :7.000 Max. :5.000
## cheater
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2496
## 3rd Qu.:0.0000
## Max. :1.0000
band_members## # A tibble: 3 x 2
## name band
## <chr> <chr>
## 1 Mick Stones
## 2 John Beatles
## 3 Paul Beatles
band_instruments## # A tibble: 3 x 2
## name plays
## <chr> <chr>
## 1 John guitar
## 2 Paul bass
## 3 Keith guitar
inner <- inner_join(band_members , band_instruments)## Joining, by = "name"
inner## # A tibble: 2 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
left <- left_join(band_members , band_instruments)## Joining, by = "name"
left## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
semi <- semi_join(band_members , band_instruments)## Joining, by = "name"
semi## # A tibble: 2 x 2
## name band
## <chr> <chr>
## 1 John Beatles
## 2 Paul Beatles
anti <- anti_join(band_members , band_instruments)## Joining, by = "name"
anti## # A tibble: 1 x 2
## name band
## <chr> <chr>
## 1 Mick Stones
summarise_cheater <- cheater %>%
group_by(children) %>%
summarise(mean_cheater = mean(cheater) ,
mean_rel = mean(religiousness))
summarise_cheater## # A tibble: 2 x 3
## children mean_cheater mean_rel
## <fct> <dbl> <dbl>
## 1 no 0.158 2.88
## 2 yes 0.286 3.21
gathered <- summarise_cheater %>%
gather(metric , value , -children)
gathered## # A tibble: 4 x 3
## children metric value
## <fct> <chr> <dbl>
## 1 no mean_cheater 0.158
## 2 yes mean_cheater 0.286
## 3 no mean_rel 2.88
## 4 yes mean_rel 3.21
gathered## # A tibble: 4 x 3
## children metric value
## <fct> <chr> <dbl>
## 1 no mean_cheater 0.158
## 2 yes mean_cheater 0.286
## 3 no mean_rel 2.88
## 4 yes mean_rel 3.21
spreading <- gathered %>%
spread(metric , value)
spreading## # A tibble: 2 x 3
## children mean_cheater mean_rel
## * <fct> <dbl> <dbl>
## 1 no 0.158 2.88
## 2 yes 0.286 3.21