1/29/2024

What we’ll cover

Data Wrangling with tidyverse

Please load some pakages

library(tidyverse)

library(AER)
data('Affairs')

affairs <- Affairs
summary(affairs)
##     affairs          gender         age         yearsmarried    children 
##  Min.   : 0.000   female:315   Min.   :17.50   Min.   : 0.125   no :171  
##  1st Qu.: 0.000   male  :286   1st Qu.:27.00   1st Qu.: 4.000   yes:430  
##  Median : 0.000                Median :32.00   Median : 7.000            
##  Mean   : 1.456                Mean   :32.49   Mean   : 8.178            
##  3rd Qu.: 0.000                3rd Qu.:37.00   3rd Qu.:15.000            
##  Max.   :12.000                Max.   :57.00   Max.   :15.000            
##  religiousness     education       occupation        rating     
##  Min.   :1.000   Min.   : 9.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:14.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :16.00   Median :5.000   Median :4.000  
##  Mean   :3.116   Mean   :16.17   Mean   :4.195   Mean   :3.932  
##  3rd Qu.:4.000   3rd Qu.:18.00   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :20.00   Max.   :7.000   Max.   :5.000

Basic wrangling verbs for today

arrange
filter
mutate
summarise
group_by
select
rename

Sorting by rating and inserting into a new object

sorted_by_rating <- arrange(affairs , rating)

head(sorted_by_rating)
##      affairs gender age yearsmarried children religiousness education
## 224        0 female  27            4      yes             2        18
## 277        0 female  37           15      yes             4        14
## 491        0   male  57           15      yes             3        16
## 751        0 female  52           15      yes             5        17
## 1160       0 female  37           15      yes             2        14
## 1207       0 female  27            7      yes             2        12
##      occupation rating
## 224           6      1
## 277           3      1
## 491           6      1
## 751           1      1
## 1160          1      1
## 1207          5      1

Sorting by rating in descending order and inserting into a new object

sorted_by_desc_rating <- arrange(affairs , desc(rating))

head(sorted_by_rating)
##      affairs gender age yearsmarried children religiousness education
## 224        0 female  27            4      yes             2        18
## 277        0 female  37           15      yes             4        14
## 491        0   male  57           15      yes             3        16
## 751        0 female  52           15      yes             5        17
## 1160       0 female  37           15      yes             2        14
## 1207       0 female  27            7      yes             2        12
##      occupation rating
## 224           6      1
## 277           3      1
## 491           6      1
## 751           1      1
## 1160          1      1
## 1207          5      1

Sorting by rating in descending order and introducing the word then %>%

sorted_by_desc_rating <- affairs %>% arrange(desc(rating))

head(sorted_by_rating)
##      affairs gender age yearsmarried children religiousness education
## 224        0 female  27            4      yes             2        18
## 277        0 female  37           15      yes             4        14
## 491        0   male  57           15      yes             3        16
## 751        0 female  52           15      yes             5        17
## 1160       0 female  37           15      yes             2        14
## 1207       0 female  27            7      yes             2        12
##      occupation rating
## 224           6      1
## 277           3      1
## 491           6      1
## 751           1      1
## 1160          1      1
## 1207          5      1

Time for quick exercises using the affairs data

sorted_by_desc_rating <- affairs %>% arrange(desc(rating))
  1. Create a new object sorted by affairs in descending order
  2. Create a new object with affairs in descending order and religiousness

Filtering data - condition operators

== reads equal
!= not equal
>= greater than or equal
<= less than or equal
> greater than < less than

Filtering where rating = 5

filtered_rating_5 <- affairs %>% 
  filter(rating == 5)

nrow(filtered_rating_5)
## [1] 232
head(filtered_rating_5)
##     affairs gender age yearsmarried children religiousness education occupation
## 16        0   male  57         15.0      yes             5        18          6
## 29        0 female  32          1.5       no             2        17          5
## 49        0   male  22          1.5       no             4        14          4
## 93        0 female  37         15.0      yes             1        17          5
## 115       0 female  22          1.5       no             2        16          5
## 116       0 female  27         10.0      yes             2        14          1
##     rating
## 16       5
## 29       5
## 49       5
## 93       5
## 115      5
## 116      5

Filtering where rating = 5

filtered_rating_5 <- affairs %>% 
  filter(rating == 5)

nrow(filtered_rating_5)
## [1] 232
head(filtered_rating_5)
##     affairs gender age yearsmarried children religiousness education occupation
## 16        0   male  57         15.0      yes             5        18          6
## 29        0 female  32          1.5       no             2        17          5
## 49        0   male  22          1.5       no             4        14          4
## 93        0 female  37         15.0      yes             1        17          5
## 115       0 female  22          1.5       no             2        16          5
## 116       0 female  27         10.0      yes             2        14          1
##     rating
## 16       5
## 29       5
## 49       5
## 93       5
## 115      5
## 116      5

Filtering where rating = 3 with between

filtered_rating_3 <- affairs %>% 
  filter(rating > 2 , rating < 4)

nrow(filtered_rating_3)
## [1] 93
head(filtered_rating_3)
##     affairs gender age yearsmarried children religiousness education occupation
## 23        0   male  22         0.75       no             2        17          6
## 44        0 female  22         0.75       no             2        12          1
## 108       0 female  37        15.00      yes             2        18          4
## 162       0   male  42        15.00      yes             4        20          6
## 172       0 female  22         1.50       no             4        16          5
## 217       0   male  52        15.00      yes             5        18          6
##     rating
## 23       3
## 44       3
## 108      3
## 162      3
## 172      3
## 217      3

Filtering exercise using the affairs data

filtered_rating_3 <- affairs %>% 
  filter(rating > 2 , rating < 4)
  1. Create a new object with the data filtered where children = ‘Yes’
  2. Create a new object with the data filtered where
    1. affairs > 0
    2. religiousness == 5
    3. rating == 5

Creating a new variable: how do you define a cheater?

cheater <- affairs %>% 
  mutate(cheater = ifelse(affairs > 0 , 1 , 0))

head(arrange(cheater , affairs))
##    affairs gender age yearsmarried children religiousness education occupation
## 4        0   male  37        10.00       no             3        18          7
## 5        0 female  27         4.00       no             4        14          6
## 11       0 female  32        15.00      yes             1        12          1
## 16       0   male  57        15.00      yes             5        18          6
## 23       0   male  22         0.75       no             2        17          6
## 29       0 female  32         1.50       no             2        17          5
##    rating cheater
## 4       4       0
## 5       4       0
## 11      4       0
## 16      5       0
## 23      3       0
## 29      5       0
head(arrange(cheater , desc(affairs)))
##     affairs gender age yearsmarried children religiousness education occupation
## 53       12 female  32         10.0      yes             3        17          5
## 122      12   male  37         15.0      yes             4        14          5
## 174      12 female  42         15.0      yes             5         9          4
## 176      12   male  37         10.0      yes             2        20          6
## 181      12 female  32         15.0      yes             3        14          1
## 252      12   male  27          1.5      yes             3        17          5
##     rating cheater
## 53       2       1
## 122      2       1
## 174      1       1
## 176      2       1
## 181      2       1
## 252      4       1

Creating multiple new variables

cheater <- affairs %>% 
  mutate(cheater          = ifelse(affairs > 0 , 1 , 0) , 
         age_when_married = age - yearsmarried)

head(arrange(cheater , affairs))
##    affairs gender age yearsmarried children religiousness education occupation
## 4        0   male  37        10.00       no             3        18          7
## 5        0 female  27         4.00       no             4        14          6
## 11       0 female  32        15.00      yes             1        12          1
## 16       0   male  57        15.00      yes             5        18          6
## 23       0   male  22         0.75       no             2        17          6
## 29       0 female  32         1.50       no             2        17          5
##    rating cheater age_when_married
## 4       4       0            27.00
## 5       4       0            23.00
## 11      4       0            17.00
## 16      5       0            42.00
## 23      3       0            21.25
## 29      5       0            30.50
head(arrange(cheater , desc(affairs)))
##     affairs gender age yearsmarried children religiousness education occupation
## 53       12 female  32         10.0      yes             3        17          5
## 122      12   male  37         15.0      yes             4        14          5
## 174      12 female  42         15.0      yes             5         9          4
## 176      12   male  37         10.0      yes             2        20          6
## 181      12 female  32         15.0      yes             3        14          1
## 252      12   male  27          1.5      yes             3        17          5
##     rating cheater age_when_married
## 53       2       1             22.0
## 122      2       1             22.0
## 174      1       1             27.0
## 176      2       1             27.0
## 181      2       1             17.0
## 252      4       1             25.5
summary(cheater$cheater)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2496  0.0000  1.0000

By the way, I think the age when married is not correct

summary(cheater$age_when_married)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.50   20.50   22.00   24.31   27.00   45.00

Likely the variable age is the age when married rather than the current age.

Hope so…

Exercises using the affairs data

cheater <- affairs %>% 
  mutate(cheater = ifelse(affairs > 0 , 1 , 0))

Create a new object with two new variables:
1. cheater defined as having had at least 6 affairs
2. religiousness into rating (religiousness/rating)

Let’s do multiple verbs, taking advantage of the word %>%

Using the data affairs
THEN
create a new variable cheater
THEN
filtering for religiousness = 5 and cheater = 1

cheater_rel5 <- affairs %>% 
  mutate(cheater = ifelse(affairs > 0 , 1 , 0)) %>% 
  filter(religiousness == 5 ,
         cheater != 0)

nrow(cheater_rel5)
## [1] 13
summary(cheater_rel5)
##     affairs          gender       age         yearsmarried   children
##  Min.   : 1.000   female:7   Min.   :17.50   Min.   : 0.75   no : 1  
##  1st Qu.: 1.000   male  :6   1st Qu.:27.00   1st Qu.:10.00   yes:12  
##  Median : 3.000              Median :37.00   Median :15.00           
##  Mean   : 4.769              Mean   :36.65   Mean   :11.87           
##  3rd Qu.: 7.000              3rd Qu.:47.00   3rd Qu.:15.00           
##  Max.   :12.000              Max.   :57.00   Max.   :15.00           
##  religiousness   education       occupation        rating         cheater 
##  Min.   :5     Min.   : 9.00   Min.   :1.000   Min.   :1.000   Min.   :1  
##  1st Qu.:5     1st Qu.:14.00   1st Qu.:4.000   1st Qu.:3.000   1st Qu.:1  
##  Median :5     Median :16.00   Median :5.000   Median :4.000   Median :1  
##  Mean   :5     Mean   :16.15   Mean   :4.308   Mean   :3.692   Mean   :1  
##  3rd Qu.:5     3rd Qu.:18.00   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:1  
##  Max.   :5     Max.   :20.00   Max.   :6.000   Max.   :5.000   Max.   :1

Summarizing data

You can create a new object with the mean cheater using native R

mean_cheater <- mean(cheater$cheater)

mean_cheater
## [1] 0.249584
str(mean_cheater)
##  num 0.25

In dplyr it would be:

mean_cheater <- cheater %>% summarise(mean_cheater = mean(cheater))

mean_cheater
##   mean_cheater
## 1     0.249584
str(mean_cheater)
## 'data.frame':    1 obs. of  1 variable:
##  $ mean_cheater: num 0.25

We can summarise multiple variables in one statement

summarise_cheater <- cheater %>% 
  summarise(mean_cheater = mean(cheater) , 
            mean_rel     = mean(religiousness))

summarise_cheater
##   mean_cheater mean_rel
## 1     0.249584 3.116473
str(summarise_cheater)
## 'data.frame':    1 obs. of  2 variables:
##  $ mean_cheater: num 0.25
##  $ mean_rel    : num 3.12

Exercise: summarise

summarise_cheater <- cheater %>% 
  summarise(mean_cheater = mean(cheater) , 
            mean_rel     = mean(religiousness))

Create a data frame using the cheater data that calculates:
1. The sum of cheater
2. The mean of rating

Now let’s summarise by group

Cheater proportion by religiousness

cheater_by_rel <- cheater %>% 
  group_by(religiousness) %>% 
  summarise(proportion_cheater = mean(cheater))

cheater_by_rel
## # A tibble: 5 × 2
##   religiousness proportion_cheater
##           <int>              <dbl>
## 1             1              0.417
## 2             2              0.25 
## 3             3              0.333
## 4             4              0.174
## 5             5              0.186

group_by + mutate = ?

Cheater proportion by religiousness

cheater_by_rel <- cheater %>% 
  group_by(religiousness) %>% 
  mutate(proportion_cheater = mean(cheater)) %>% 
  select(1:3 , religiousness, cheater , proportion_cheater) %>% 
  ungroup()

head(cheater_by_rel)
## # A tibble: 6 × 6
##   affairs gender   age religiousness cheater proportion_cheater
##     <dbl> <fct>  <dbl>         <int>   <dbl>              <dbl>
## 1       0 male      37             3       0              0.333
## 2       0 female    27             4       0              0.174
## 3       0 female    32             1       0              0.417
## 4       0 male      57             5       0              0.186
## 5       0 male      22             2       0              0.25 
## 6       0 female    32             2       0              0.25

This has many uses

The verb select is extremely powerful

Check out the documentation to see the multiple ways you can select variables

?dplyr::select

cheater %>% 
  select(starts_with('r'))
##      religiousness rating
## 4                3      4
## 5                4      4
## 11               1      4
## 16               5      5
## 23               2      3
## 29               2      5
## 44               2      3
## 45               2      4
## 47               4      2
## 49               4      5
## 50               2      2
## 55               4      4
## 64               5      4
## 80               2      4
## 86               4      4
## 93               1      5
## 108              2      3
## 114              3      4
## 115              2      5
## 116              2      5
## 123              2      5
## 127              2      5
## 129              4      4
## 134              3      5
## 137              2      4
## 139              2      5
## 147              4      5
## 151              5      4
## 153              3      5
## 155              3      4
## 162              4      3
## 163              3      5
## 165              4      4
## 168              5      4
## 170              1      4
## 172              4      3
## 184              3      4
## 187              4      5
## 192              1      5
## 194              3      5
## 210              5      5
## 217              5      3
## 220              5      4
## 224              2      1
## 227              5      3
## 228              3      5
## 239              4      5
## 241              2      4
## 245              4      5
## 249              2      4
## 262              5      5
## 265              2      4
## 267              5      5
## 269              4      4
## 271              1      5
## 277              4      1
## 290              5      4
## 292              4      5
## 293              4      4
## 295              4      4
## 299              5      2
## 320              3      4
## 321              2      5
## 324              2      5
## 334              4      4
## 351              2      5
## 355              4      4
## 361              3      5
## 362              5      5
## 366              2      4
## 370              3      5
## 374              5      5
## 378              2      5
## 381              4      3
## 382              1      5
## 383              4      5
## 384              3      5
## 400              3      5
## 403              3      5
## 409              2      2
## 412              3      5
## 413              4      5
## 416              2      4
## 418              4      3
## 422              4      5
## 435              1      4
## 439              4      5
## 445              3      2
## 447              3      5
## 448              3      4
## 449              4      4
## 478              2      3
## 482              4      4
## 486              5      3
## 489              1      4
## 490              5      5
## 491              3      1
## 492              1      4
## 503              3      5
## 508              4      5
## 509              5      5
## 512              2      4
## 515              4      5
## 517              5      5
## 532              4      4
## 533              4      4
## 535              4      4
## 537              4      4
## 538              3      3
## 543              2      4
## 547              4      5
## 550              4      5
## 558              5      5
## 571              5      4
## 578              3      4
## 583              4      5
## 586              4      2
## 594              5      4
## 597              4      5
## 602              4      4
## 603              2      5
## 604              5      4
## 612              4      3
## 613              2      2
## 621              3      5
## 627              5      5
## 630              2      5
## 631              4      5
## 632              2      5
## 639              2      5
## 645              4      3
## 647              4      5
## 648              2      5
## 651              2      3
## 655              3      3
## 667              3      4
## 670              2      5
## 671              2      4
## 673              4      5
## 701              1      4
## 705              2      4
## 706              5      4
## 709              5      3
## 717              2      4
## 719              3      3
## 723              1      4
## 724              4      4
## 726              5      3
## 734              4      4
## 735              1      4
## 736              4      3
## 737              2      3
## 739              2      3
## 743              4      3
## 745              2      3
## 747              4      3
## 751              5      1
## 752              4      2
## 754              4      3
## 760              2      5
## 763              2      5
## 774              2      5
## 776              5      5
## 779              5      4
## 784              3      4
## 788              4      2
## 794              2      2
## 795              4      4
## 798              3      4
## 800              4      2
## 803              4      5
## 807              4      5
## 812              4      5
## 820              3      4
## 823              2      5
## 830              2      2
## 843              3      4
## 848              2      4
## 851              4      3
## 854              4      4
## 856              5      2
## 857              4      5
## 859              3      3
## 863              4      5
## 865              3      5
## 867              4      5
## 870              3      5
## 873              2      5
## 875              4      5
## 876              1      5
## 877              3      5
## 880              4      4
## 903              4      4
## 904              4      4
## 905              2      3
## 908              1      4
## 909              4      3
## 910              2      5
## 912              4      2
## 914              4      5
## 915              5      5
## 916              2      5
## 920              3      5
## 921              2      3
## 925              2      2
## 926              2      3
## 929              4      2
## 931              1      5
## 945              2      4
## 947              4      3
## 949              3      3
## 950              2      5
## 961              2      2
## 965              2      5
## 966              2      5
## 967              5      5
## 987              4      5
## 990              4      3
## 992              1      4
## 995              5      5
## 1009             3      3
## 1021             4      5
## 1026             4      5
## 1027             4      4
## 1030             3      4
## 1031             1      5
## 1034             4      4
## 1037             2      4
## 1038             4      5
## 1039             3      5
## 1045             5      4
## 1046             4      5
## 1054             4      5
## 1059             4      4
## 1063             4      5
## 1068             4      3
## 1070             5      5
## 1072             5      5
## 1073             3      5
## 1077             4      4
## 1081             2      4
## 1083             3      4
## 1084             1      5
## 1086             3      5
## 1087             4      4
## 1089             3      4
## 1096             3      3
## 1102             4      5
## 1103             3      4
## 1107             5      4
## 1109             4      3
## 1115             2      4
## 1119             4      5
## 1124             4      4
## 1126             1      5
## 1128             3      4
## 1129             2      5
## 1130             2      4
## 1133             4      4
## 1140             2      5
## 1143             3      5
## 1146             4      3
## 1153             1      4
## 1156             2      5
## 1157             3      3
## 1158             2      5
## 1160             2      1
## 1161             2      5
## 1166             4      5
## 1177             3      5
## 1178             4      5
## 1180             3      5
## 1187             2      3
## 1191             2      3
## 1195             4      4
## 1207             2      1
## 1208             5      3
## 1209             2      5
## 1211             4      4
## 1215             1      5
## 1221             4      4
## 1226             4      4
## 1229             4      4
## 1231             3      3
## 1234             3      4
## 1235             3      2
## 1242             3      5
## 1245             3      2
## 1260             3      2
## 1266             2      4
## 1271             3      4
## 1273             2      5
## 1276             5      4
## 1280             4      3
## 1282             4      5
## 1285             2      3
## 1295             4      5
## 1298             2      5
## 1299             4      3
## 1304             5      4
## 1305             2      2
## 1311             2      5
## 1314             2      4
## 1319             3      5
## 1322             4      4
## 1324             2      4
## 1327             4      5
## 1328             5      3
## 1330             4      3
## 1332             3      5
## 1333             2      2
## 1336             4      5
## 1341             5      5
## 1344             1      4
## 1352             5      3
## 1358             4      5
## 1359             2      5
## 1361             2      4
## 1364             2      4
## 1368             4      4
## 1384             3      5
## 1390             2      5
## 1393             4      4
## 1394             4      5
## 1402             2      5
## 1407             1      5
## 1408             3      4
## 1412             2      5
## 1413             5      5
## 1416             4      3
## 1417             2      4
## 1418             4      4
## 1419             4      4
## 1420             5      5
## 1423             3      5
## 1424             4      2
## 1432             4      5
## 1433             4      5
## 1437             3      5
## 1438             2      5
## 1439             2      5
## 1446             2      3
## 1450             2      4
## 1451             2      5
## 1452             4      4
## 1453             4      4
## 1456             4      5
## 1464             3      3
## 1469             4      3
## 1473             5      2
## 1481             2      5
## 1482             3      5
## 1496             5      4
## 1497             4      5
## 1504             4      5
## 1513             2      5
## 1515             3      3
## 1534             2      5
## 1535             4      4
## 1536             3      5
## 1540             3      5
## 1551             5      5
## 1555             5      4
## 1557             3      5
## 1566             4      5
## 1567             3      5
## 1576             4      5
## 1584             4      5
## 1585             1      4
## 1590             3      4
## 1594             4      2
## 1595             5      2
## 1603             2      5
## 1608             4      3
## 1609             4      5
## 1615             4      4
## 1616             3      5
## 1617             3      4
## 1620             3      5
## 1621             2      5
## 1637             1      5
## 1638             4      5
## 1650             4      5
## 1654             4      4
## 1665             2      2
## 1670             4      5
## 1671             2      4
## 1675             2      4
## 1688             2      5
## 1691             3      4
## 1695             5      5
## 1698             4      4
## 1704             3      2
## 1705             3      2
## 1711             2      2
## 1719             5      5
## 1723             1      5
## 1726             1      5
## 1749             4      5
## 1752             2      3
## 1754             5      5
## 1758             4      5
## 1761             5      4
## 1773             2      3
## 1775             4      1
## 1786             4      5
## 1793             2      4
## 1799             4      5
## 1803             2      4
## 1806             4      5
## 1807             2      3
## 1808             2      5
## 1814             4      3
## 1815             4      4
## 1818             3      5
## 1827             2      3
## 1834             4      5
## 1835             4      3
## 1843             3      5
## 1846             4      5
## 1850             4      5
## 1851             2      4
## 1854             2      1
## 1859             5      4
## 1861             2      4
## 1866             4      5
## 1873             1      5
## 1875             4      4
## 1885             5      3
## 1892             4      4
## 1895             2      4
## 1896             3      5
## 1897             4      4
## 1899             5      5
## 1904             2      2
## 1905             3      4
## 1908             2      4
## 1916             4      4
## 1918             4      3
## 1920             2      2
## 1930             3      3
## 1940             3      5
## 1947             3      4
## 1949             2      5
## 1951             4      4
## 1952             4      5
## 1960             2      5
## 9001             4      4
## 9012             2      2
## 9023             3      4
## 9029             2      4
## 6                3      4
## 12               3      5
## 43               5      2
## 53               3      2
## 67               4      5
## 79               2      5
## 122              4      2
## 126              2      4
## 133              2      4
## 138              4      2
## 154              4      2
## 159              3      4
## 174              5      1
## 176              2      2
## 181              3      2
## 182              1      5
## 186              2      3
## 189              3      5
## 204              4      5
## 215              5      5
## 232              3      4
## 233              5      4
## 252              3      4
## 253              4      2
## 274              4      4
## 275              4      3
## 287              2      2
## 288              4      4
## 325              3      3
## 328              4      4
## 344              2      2
## 353              5      5
## 354              4      5
## 367              2      4
## 369              2      4
## 390              1      3
## 392              2      5
## 423              2      4
## 432              1      3
## 436              2      3
## 483              3      5
## 513              3      4
## 516              1      2
## 518              3      5
## 520              4      5
## 526              4      1
## 528              3      4
## 553              4      5
## 576              3      4
## 611              5      4
## 625              2      4
## 635              2      4
## 646              2      5
## 657              5      3
## 659              3      3
## 666              4      4
## 679              3      4
## 729              2      1
## 755              1      2
## 758              3      4
## 770              4      4
## 786              1      4
## 797              3      2
## 811              1      4
## 834              4      4
## 858              1      4
## 885              4      2
## 893              1      3
## 927              3      1
## 928              3      4
## 933              3      3
## 951              2      2
## 968              4      5
## 972              3      4
## 975              1      5
## 977              3      3
## 981              2      5
## 986              3      3
## 1002             2      4
## 1007             4      2
## 1011             3      4
## 1035             1      3
## 1050             2      3
## 1056             3      3
## 1057             5      5
## 1075             2      2
## 1080             1      3
## 1125             4      2
## 1131             2      4
## 1138             2      3
## 1150             3      4
## 1163             4      3
## 1169             4      5
## 1198             2      2
## 1204             5      3
## 1218             3      1
## 1230             2      2
## 1236             3      4
## 1247             2      2
## 1259             3      1
## 1294             5      5
## 1353             4      4
## 1370             2      3
## 1427             3      3
## 1445             5      2
## 1460             2      4
## 1480             2      4
## 1505             1      2
## 1543             3      3
## 1548             4      1
## 1550             2      5
## 1561             2      2
## 1564             2      4
## 1573             2      5
## 1575             4      5
## 1599             3      3
## 1622             4      5
## 1629             1      5
## 1664             2      4
## 1669             1      1
## 1674             2      5
## 1682             5      4
## 1685             1      5
## 1697             4      4
## 1716             3      2
## 1730             2      4
## 1731             4      4
## 1732             3      2
## 1743             3      3
## 1751             1      4
## 1757             4      3
## 1763             3      4
## 1766             2      3
## 1772             3      2
## 1776             5      5
## 1782             1      5
## 1784             1      5
## 1791             4      4
## 1831             3      2
## 1840             4      2
## 1844             3      2
## 1856             2      2
## 1876             2      4
## 1929             4      3
## 1935             3      2
## 1938             1      5
## 1941             2      4
## 1954             2      5
## 1959             3      2
## 9010             3      5

Exercises: group_by

  1. Create a data frame grouped by children (Yes/No), summarizing:
    • mean religiousness
    • mean rating
    • mean cheater
  2. Create the same table using the mutate statement

By now, renaming is all but covered

Renaming religiousness to rel

cheater_renamed <- cheater %>% 
  rename(rel = religiousness)

summary(cheater_renamed)
##     affairs          gender         age         yearsmarried    children 
##  Min.   : 0.000   female:315   Min.   :17.50   Min.   : 0.125   no :171  
##  1st Qu.: 0.000   male  :286   1st Qu.:27.00   1st Qu.: 4.000   yes:430  
##  Median : 0.000                Median :32.00   Median : 7.000            
##  Mean   : 1.456                Mean   :32.49   Mean   : 8.178            
##  3rd Qu.: 0.000                3rd Qu.:37.00   3rd Qu.:15.000            
##  Max.   :12.000                Max.   :57.00   Max.   :15.000            
##       rel          education       occupation        rating     
##  Min.   :1.000   Min.   : 9.00   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:14.00   1st Qu.:3.000   1st Qu.:3.000  
##  Median :3.000   Median :16.00   Median :5.000   Median :4.000  
##  Mean   :3.116   Mean   :16.17   Mean   :4.195   Mean   :3.932  
##  3rd Qu.:4.000   3rd Qu.:18.00   3rd Qu.:6.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :20.00   Max.   :7.000   Max.   :5.000  
##     cheater      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2496  
##  3rd Qu.:0.0000  
##  Max.   :1.0000

Next time…

  • Joins