sjmisc learning
sjmisc complements dplyr, and helps with data transformation tasks and recoding variables. sjmisc works together seamlessly with dplyr and pipes. All functions are designed to support labelled data.
Design Philosophy
# A tibble: 6 x 26
c12hour e15relat e16sex e17age e42dep c82cop1 c83cop2 c84cop3 c85cop4 c86cop5
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 16 2 2 83 3 3 2 2 2 1
2 148 2 2 88 3 3 3 3 3 4
3 70 1 2 82 3 2 2 1 4 1
4 168 1 2 67 4 4 1 3 1 1
5 168 2 2 84 4 3 2 1 2 2
6 16 2 2 85 4 2 2 3 3 3
# ... with 16 more variables: c87cop6 <dbl>, c88cop7 <dbl>, c89cop8 <dbl>,
# c90cop9 <dbl>, c160age <dbl>, c161sex <dbl>, c172code <dbl>,
# c175empl <dbl>, barthtot <dbl>, neg_c_7 <dbl>, pos_v_4 <dbl>, quol_5 <dbl>,
# resttotn <dbl>, tot_sc_e <dbl>, n4pstu <dbl>, nur_pst <dbl>
[1] 4 4 1 1 2 1 4 2 2 4 4 3 3 3 4 4 4 1 2 1 1 2 2 4 2 1 2 2 4 6 8 2
[1] 2 2 1 1 1 1 2 1 1 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 1 1 1 1 2 3 3 1
mpg cyl disp hp drat wt qsec vs am gear carb carb_r
1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2
2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2
3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 1
4 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1
5 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 1
6 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 1
[ reached 'max' / getOption("max.print") -- omitted 26 rows ]
The …-ellipses-argument
.
3 4 5
15 12 5
.
1 2 3 4 6 8
7 10 3 10 1 1
mpg cyl disp hp drat wt qsec vs am gear carb gear_r carb_r
1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2 2
2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2 2
3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 2 1
4 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1 1
5 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 1 1
[ reached 'max' / getOption("max.print") -- omitted 27 rows ]
mpg cyl disp hp drat wt qsec vs am gear carb gear_r carb_r
1 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2 2
2 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2 2
3 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 2 1
4 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1 1
5 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 1 1
[ reached 'max' / getOption("max.print") -- omitted 27 rows ]
Descriptives and Summaries
elder's dependency (e42dep) <numeric>
# grouped by: male, low level of education
# total N=80 valid N=80 mean=3.06 sd=0.92
val label frq raw.prc valid.prc cum.prc
1 independent 5 6.25 6.25 6.25
2 slightly dependent 16 20.00 20.00 26.25
3 moderately dependent 28 35.00 35.00 61.25
4 severely dependent 31 38.75 38.75 100.00
NA <NA> 0 0.00 NA NA
elder's dependency (e42dep) <numeric>
# grouped by: male, intermediate level of education
# total N=156 valid N=156 mean=2.83 sd=0.94
val label frq raw.prc valid.prc cum.prc
1 independent 15 9.62 9.62 9.62
2 slightly dependent 39 25.00 25.00 34.62
3 moderately dependent 59 37.82 37.82 72.44
4 severely dependent 43 27.56 27.56 100.00
NA <NA> 0 0.00 NA NA
elder's dependency (e42dep) <numeric>
# grouped by: male, high level of education
# total N=43 valid N=43 mean=2.91 sd=0.81
val label frq raw.prc valid.prc cum.prc
1 independent 1 2.33 2.33 2.33
2 slightly dependent 13 30.23 30.23 32.56
3 moderately dependent 18 41.86 41.86 74.42
4 severely dependent 11 25.58 25.58 100.00
NA <NA> 0 0.00 NA NA
elder's dependency (e42dep) <numeric>
# grouped by: female, low level of education
# total N=99 valid N=99 mean=2.95 sd=0.94
val label frq raw.prc valid.prc cum.prc
1 independent 7 7.07 7.07 7.07
2 slightly dependent 25 25.25 25.25 32.32
3 moderately dependent 33 33.33 33.33 65.66
4 severely dependent 34 34.34 34.34 100.00
NA <NA> 0 0.00 NA NA
elder's dependency (e42dep) <numeric>
# grouped by: female, intermediate level of education
# total N=350 valid N=350 mean=2.90 sd=0.98
val label frq raw.prc valid.prc cum.prc
1 independent 30 8.57 8.57 8.57
2 slightly dependent 96 27.43 27.43 36.00
3 moderately dependent 104 29.71 29.71 65.71
4 severely dependent 120 34.29 34.29 100.00
NA <NA> 0 0.00 NA NA
elder's dependency (e42dep) <numeric>
# grouped by: female, high level of education
# total N=113 valid N=113 mean=3.04 sd=0.85
val label frq raw.prc valid.prc cum.prc
1 independent 4 3.54 3.54 3.54
2 slightly dependent 26 23.01 23.01 26.55
3 moderately dependent 44 38.94 38.94 65.49
4 severely dependent 39 34.51 34.51 100.00
NA <NA> 0 0.00 NA NA
gear <numeric>
# grouped by: 4
# total N=11 valid N=11 mean=4.09 sd=0.54
val frq raw.prc valid.prc cum.prc
3 1 9.09 9.09 9.09
4 8 72.73 72.73 81.82
5 2 18.18 18.18 100.00
NA 0 0.00 NA NA
gear <numeric>
# grouped by: 6
# total N=7 valid N=7 mean=3.86 sd=0.69
val frq raw.prc valid.prc cum.prc
3 2 28.57 28.57 28.57
4 4 57.14 57.14 85.71
5 1 14.29 14.29 100.00
NA 0 0.00 NA NA
gear <numeric>
# grouped by: 8
# total N=14 valid N=14 mean=3.29 sd=0.73
val frq raw.prc valid.prc cum.prc
3 12 85.71 85.71 85.71
5 2 14.29 14.29 100.00
NA 0 0.00 NA NA
elder's dependency (e42dep) <numeric>
# total N=908 valid N=901 mean=2.94 sd=0.94
val label frq raw.prc valid.prc cum.prc
1 independent 66 7.27 7.33 7.33
2 slightly dependent 225 24.78 24.97 32.30
3 moderately dependent 306 33.70 33.96 66.26
4 severely dependent 304 33.48 33.74 100.00
NA <NA> 7 0.77 NA NA
carer's gender (c161sex) <numeric>
# total N=908 valid N=901 mean=1.76 sd=0.43
val label frq raw.prc valid.prc cum.prc
1 Male 215 23.68 23.86 23.86
2 Female 686 75.55 76.14 100.00
NA <NA> 7 0.77 NA NA
Descriptive Summary
## Basic descriptive statistics
var type label n NA.prc mean sd se md trimmed
mpg numeric mpg 32 0 20.09 6.03 1.07 19.20 19.70
cyl numeric cyl 32 0 6.19 1.79 0.32 6.00 6.23
disp numeric disp 32 0 230.72 123.94 21.91 196.30 222.52
hp numeric hp 32 0 146.69 68.56 12.12 123.00 141.19
drat numeric drat 32 0 3.60 0.53 0.09 3.70 3.58
wt numeric wt 32 0 3.22 0.98 0.17 3.33 3.15
range skew
23.5 (10.4-33.9) 0.67
4 (4-8) -0.19
400.9 (71.1-472) 0.42
283 (52-335) 0.80
2.17 (2.76-4.93) 0.29
3.91 (1.51-5.42) 0.47
[ reached 'max' / getOption("max.print") -- omitted 5 rows ]
## Basic descriptive statistics
var type label n NA.prc mean sd se md trimmed
c82cop1 numeric do you feel you cope... 901 0.77 3.12 0.58 0.02 3 3.15
c83cop2 numeric do you find... 902 0.66 2.02 0.72 0.02 2 1.98
c84cop3 numeric does caregiving... 902 0.66 1.63 0.87 0.03 1 1.47
c85cop4 numeric does caregiving have... 898 1.10 1.77 0.87 0.03 2 1.63
c86cop5 numeric does caregiving... 902 0.66 1.39 0.67 0.02 1 1.26
c87cop6 numeric does caregiving... 900 0.88 1.29 0.64 0.02 1 1.13
range skew
3 (1-4) -0.12
3 (1-4) 0.65
3 (1-4) 1.31
3 (1-4) 1.06
3 (1-4) 1.77
3 (1-4) 2.43
[ reached 'max' / getOption("max.print") -- omitted 3 rows ]
Finding Variables in a Data Frame
c82cop1 c83cop2 c84cop3 c85cop4 c86cop5 c87cop6 c88cop7 c89cop8 c90cop9
1 3 2 2 2 1 1 2 3 3
2 3 3 3 3 4 1 3 2 2
3 2 2 1 4 1 1 1 4 3
4 4 1 3 1 1 1 1 2 4
5 3 2 1 2 2 2 1 4 4
6 2 2 3 3 3 2 2 1 1
7 4 2 4 1 1 2 4 1 4
8 3 2 2 1 1 1 2 3 3
[ reached 'max' / getOption("max.print") -- omitted 900 rows ]
col.nr var.name var.label
1 17 c172code carer's level of education
Summarise Variables and Cases
[1] 908 26
rowsums
1 19
2 24
3 19
4 18
5 21
6 19
7 23
8 18
9 20
10 15
11 29
12 22
13 22
14 25
15 18
16 20
17 20
18 19
19 19
20 22
21 17
22 15
23 15
24 22
25 18
26 26
27 18
28 16
29 19
30 16
31 29
32 16
33 17
34 24
35 18
36 19
37 20
38 18
39 23
40 21
41 23
42 21
43 21
44 20
45 23
46 20
47 21
48 21
49 22
50 21
51 23
52 15
53 16
54 19
55 18
56 19
57 26
58 23
59 21
60 25
61 20
62 23
63 17
64 24
65 15
66 20
67 17
68 19
69 19
70 21
71 18
72 21
73 19
74 20
75 20
[ reached 'max' / getOption("max.print") -- omitted 833 rows ]
rowsums
1 19
2 24
3 19
4 18
5 21
6 19
7 23
8 18
9 20
10 15
11 29
12 22
13 22
14 25
15 18
16 20
17 20
18 19
19 19
20 22
21 17
22 15
23 15
24 22
25 18
26 26
27 18
28 16
29 19
30 16
31 29
32 16
33 17
34 24
35 18
36 19
37 20
38 18
39 23
40 21
41 23
42 21
43 21
44 20
45 23
46 20
47 21
48 21
49 22
50 21
51 23
52 15
53 16
54 19
55 18
56 19
57 26
58 23
59 21
60 25
61 20
62 23
63 17
64 24
65 15
66 20
67 17
68 19
69 19
70 21
71 18
72 21
73 19
74 20
75 20
[ reached 'max' / getOption("max.print") -- omitted 833 rows ]
dat <- data.frame(
c1 = c(1,2,NA,4),
c2 = c(NA,2,NA,5),
c3 = c(NA,4,NA,NA),
c4 = c(2,3,7,8),
c5 = c(1,7,5,3)
)
dat c1 c2 c3 c4 c5
1 1 NA NA 2 1
2 2 2 4 3 7
3 NA NA NA 7 5
4 4 5 NA 8 3
c1 c2 c3 c4 c5 rowmeans
1 1 NA NA 2 1 NA
2 2 2 4 3 7 3.6
3 NA NA NA 7 5 NA
4 4 5 NA 8 3 NA
c1 c2 c3 c4 c5 rowsums
1 1 NA NA 2 1 NA
2 2 2 4 3 7 18
3 NA NA NA 7 5 NA
4 4 5 NA 8 3 20
c1 c2 c3 c4 c5 rowmeans
1 1 NA NA 2 1 NA
2 2 2 4 3 7 2.75
3 NA NA NA 7 5 NA
4 4 5 NA 8 3 NA
c1 c2 c3 c4 c5 rowmeans
1 1 NA NA 2 1 1.500000
2 2 2 4 3 7 2.750000
3 NA NA NA 7 5 NA
4 4 5 NA 8 3 5.666667
c1 c2 c3 c4 c5 rowsums
1 1 NA NA 2 1 3
2 2 2 4 3 7 11
3 NA NA NA 7 5 NA
4 4 5 NA 8 3 17
[1] 3.857143
# create sum-score of COPE-Index, and append to data
efc %>%
select(c82cop1:c90cop9) %>%
row_sums(n = 1) c82cop1 c83cop2 c84cop3 c85cop4 c86cop5 c87cop6 c88cop7 c89cop8 c90cop9
1 3 2 2 2 1 1 2 3 3
2 3 3 3 3 4 1 3 2 2
3 2 2 1 4 1 1 1 4 3
4 4 1 3 1 1 1 1 2 4
5 3 2 1 2 2 2 1 4 4
6 2 2 3 3 3 2 2 1 1
7 4 2 4 1 1 2 4 1 4
rowsums
1 19
2 24
3 19
4 18
5 21
6 19
7 23
[ reached 'max' / getOption("max.print") -- omitted 901 rows ]
# if data frame has only one column, this column is returned
row_sums(dat[, 1, drop = FALSE], n = 0) rowsums
1 1
2 2
3 NA
4 4
Use with %>% and dplyr
gear carb gear_r carb_r
1 4 4 2 2
2 4 4 2 2
3 4 1 2 1
4 3 1 1 1
5 3 2 1 1
6 3 1 1 1
7 3 4 1 2
8 4 2 2 1
9 4 2 2 1
10 4 4 2 2
11 4 4 2 2
12 3 3 1 1
13 3 3 1 1
14 3 3 1 1
15 3 4 1 2
16 3 4 1 2
17 3 4 1 2
18 4 1 2 1
[ reached 'max' / getOption("max.print") -- omitted 14 rows ]
mtcars %>%
select(gear,carb) %>%
mutate(carb2 = rec(carb,rec = "min:2 = 0;3:max = 1"),
gear2 = rec(gear,rec = "3 = 1;4:max = 2")) gear carb carb2 gear2
1 4 4 1 2
2 4 4 1 2
3 4 1 0 2
4 3 1 0 1
5 3 2 0 1
6 3 1 0 1
7 3 4 1 1
8 4 2 0 2
9 4 2 0 2
10 4 4 1 2
11 4 4 1 2
12 3 3 1 1
13 3 3 1 1
14 3 3 1 1
15 3 4 1 1
16 3 4 1 1
17 3 4 1 1
18 4 1 0 2
[ reached 'max' / getOption("max.print") -- omitted 14 rows ]