Module 8 Exercise: dplyr

Author

Bryce Nelson

Setup and making a factor

setwd("/Users/u6022167/Desktop/GEOG5680/Module8")
getwd()

#install.packages("dplyr")
library(dplyr)

theoph = read.csv("theoph.csv")
str(theoph)

theoph$Subject = factor(theoph$Subject)

Select Subject, Time, and Concentration

Using select() to create a new data frame containing only the subject ID, time, and concentration variables.

TheophTimeConc = theoph |>
  select(Subject, Time, conc)

TheophTimeConc
    Subject  Time  conc
1         1  0.00  0.74
2         1  0.25  2.84
3         1  0.57  6.57
4         1  1.12 10.50
5         1  2.02  9.66
6         1  3.82  8.58
7         1  5.10  8.36
8         1  7.03  7.47
9         1  9.05  6.89
10        1 12.12  5.94
11        1 24.37  3.28
12        2  0.00  0.00
13        2  0.27  1.72
14        2  0.52  7.91
15        2  1.00  8.31
16        2  1.92  8.33
17        2  3.50  6.85
18        2  5.02  6.08
19        2  7.03  5.40
20        2  9.00  4.55
21        2 12.00  3.01
22        2 24.30  0.90
23        3  0.00  0.00
24        3  0.27  4.40
25        3  0.58  6.90
26        3  1.02  8.20
27        3  2.02  7.80
28        3  3.62  7.50
29        3  5.08  6.20
30        3  7.07  5.30
31        3  9.00  4.90
32        3 12.15  3.70
33        3 24.17  1.05
34        4  0.00  0.00
35        4  0.35  1.89
36        4  0.60  4.60
37        4  1.07  8.60
38        4  2.13  8.38
39        4  3.50  7.54
40        4  5.02  6.88
41        4  7.02  5.78
42        4  9.02  5.33
43        4 11.98  4.19
44        4 24.65  1.15
45        5  0.00  0.00
46        5  0.30  2.02
47        5  0.52  5.63
48        5  1.00 11.40
49        5  2.02  9.33
50        5  3.50  8.74
51        5  5.02  7.56
52        5  7.02  7.09
53        5  9.10  5.90
54        5 12.00  4.37
55        5 24.35  1.57
56        6  0.00  0.00
57        6  0.27  1.29
58        6  0.58  3.08
59        6  1.15  6.44
60        6  2.03  6.32
61        6  3.57  5.53
62        6  5.00  4.94
63        6  7.00  4.02
64        6  9.22  3.46
65        6 12.10  2.78
66        6 23.85  0.92
67        7  0.00  0.15
68        7  0.25  0.85
69        7  0.50  2.35
70        7  1.02  5.02
71        7  2.02  6.58
72        7  3.48  7.09
73        7  5.00  6.66
74        7  6.98  5.25
75        7  9.00  4.39
76        7 12.05  3.53
77        7 24.22  1.15
78        8  0.00  0.00
79        8  0.25  3.05
80        8  0.52  3.05
81        8  0.98  7.31
82        8  2.02  7.56
83        8  3.53  6.59
84        8  5.05  5.88
85        8  7.15  4.73
86        8  9.07  4.57
87        8 12.10  3.00
88        8 24.12  1.25
89        9  0.00  0.00
90        9  0.30  7.37
91        9  0.63  9.03
92        9  1.05  7.14
93        9  2.02  6.33
94        9  3.53  5.66
95        9  5.02  5.67
96        9  7.17  4.24
97        9  8.80  4.11
98        9 11.60  3.16
99        9 24.43  1.12
100      10  0.00  0.24
101      10  0.37  2.89
102      10  0.77  5.22
103      10  1.02  6.41
104      10  2.05  7.83
105      10  3.55 10.21
106      10  5.05  9.18
107      10  7.08  8.02
108      10  9.38  7.14
109      10 12.10  5.68
110      10 23.70  2.42
111      11  0.00  0.00
112      11  0.25  4.86
113      11  0.50  7.24
114      11  0.98  8.00
115      11  1.98  6.81
116      11  3.60  5.87
117      11  5.02  5.22
118      11  7.03  4.45
119      11  9.03  3.62
120      11 12.12  2.69
121      11 24.08  0.86
122      12  0.00  0.00
123      12  0.25  1.25
124      12  0.50  3.96
125      12  1.00  7.82
126      12  2.00  9.72
127      12  3.52  9.75
128      12  5.07  8.57
129      12  7.07  6.59
130      12  9.03  6.11
131      12 12.05  4.57
132      12 24.15  1.17

Subject Weight and Dose

Using select() and distinct() to create a data frame containing a single row for each subject with their weight and dose.

SubjDose = theoph |>
  select(Subject, Wt, Dose) |>
  distinct()

SubjDose
   Subject   Wt Dose
1        1 79.6 4.02
2        2 72.4 4.40
3        3 70.5 4.53
4        4 72.7 4.40
5        5 54.6 5.86
6        6 80.0 4.00
7        7 64.6 4.95
8        8 70.5 4.53
9        9 86.4 3.10
10      10 58.2 5.50
11      11 65.0 4.92
12      12 60.5 5.30

First Subject

Using filter() to create a data frame containing only the first subject.

Subj1 = theoph |>
  filter(Subject == 1)

Subj1
   Subject   Wt Dose  Time  conc
1        1 79.6 4.02  0.00  0.74
2        1 79.6 4.02  0.25  2.84
3        1 79.6 4.02  0.57  6.57
4        1 79.6 4.02  1.12 10.50
5        1 79.6 4.02  2.02  9.66
6        1 79.6 4.02  3.82  8.58
7        1 79.6 4.02  5.10  8.36
8        1 79.6 4.02  7.03  7.47
9        1 79.6 4.02  9.05  6.89
10       1 79.6 4.02 12.12  5.94
11       1 79.6 4.02 24.37  3.28

First Four Subjects

Using filter() to create a data frame containing only the first four subjects.

I initially tried:

Subj1to4 = theoph |>
  filter(Subject <= 4)

This did not work because Subject had been converted to a factor. I instead used %in%.

Subj1to4 = theoph |>
  filter(Subject %in% 1:4)

Subj1to4
   Subject   Wt Dose  Time  conc
1        1 79.6 4.02  0.00  0.74
2        1 79.6 4.02  0.25  2.84
3        1 79.6 4.02  0.57  6.57
4        1 79.6 4.02  1.12 10.50
5        1 79.6 4.02  2.02  9.66
6        1 79.6 4.02  3.82  8.58
7        1 79.6 4.02  5.10  8.36
8        1 79.6 4.02  7.03  7.47
9        1 79.6 4.02  9.05  6.89
10       1 79.6 4.02 12.12  5.94
11       1 79.6 4.02 24.37  3.28
12       2 72.4 4.40  0.00  0.00
13       2 72.4 4.40  0.27  1.72
14       2 72.4 4.40  0.52  7.91
15       2 72.4 4.40  1.00  8.31
16       2 72.4 4.40  1.92  8.33
17       2 72.4 4.40  3.50  6.85
18       2 72.4 4.40  5.02  6.08
19       2 72.4 4.40  7.03  5.40
20       2 72.4 4.40  9.00  4.55
21       2 72.4 4.40 12.00  3.01
22       2 72.4 4.40 24.30  0.90
23       3 70.5 4.53  0.00  0.00
24       3 70.5 4.53  0.27  4.40
25       3 70.5 4.53  0.58  6.90
26       3 70.5 4.53  1.02  8.20
27       3 70.5 4.53  2.02  7.80
28       3 70.5 4.53  3.62  7.50
29       3 70.5 4.53  5.08  6.20
30       3 70.5 4.53  7.07  5.30
31       3 70.5 4.53  9.00  4.90
32       3 70.5 4.53 12.15  3.70
33       3 70.5 4.53 24.17  1.05
34       4 72.7 4.40  0.00  0.00
35       4 72.7 4.40  0.35  1.89
36       4 72.7 4.40  0.60  4.60
37       4 72.7 4.40  1.07  8.60
38       4 72.7 4.40  2.13  8.38
39       4 72.7 4.40  3.50  7.54
40       4 72.7 4.40  5.02  6.88
41       4 72.7 4.40  7.02  5.78
42       4 72.7 4.40  9.02  5.33
43       4 72.7 4.40 11.98  4.19
44       4 72.7 4.40 24.65  1.15

Average Concentration by Subject

Using group_by() and summarise() to calculate the average concentration for each subject across all sampling times.

avgConc = theoph |>
  group_by(Subject) |>
  summarise(avgConc = mean(conc))

avgConc
# A tibble: 12 × 2
   Subject avgConc
   <fct>     <dbl>
 1 1          6.44
 2 2          4.82
 3 3          5.09
 4 4          4.94
 5 5          5.78
 6 6          3.53
 7 7          3.91
 8 8          4.27
 9 9          4.89
10 10         5.93
11 11         4.51
12 12         5.41

Concentration Through Time for Subject 1

Using filter() and ggplot() to create a line plot of concentration through time for the first subject.

library(ggplot2)

theoph |>
  filter(Subject == 1) |>
  ggplot(aes(x = Time, y = conc)) +
  geom_line()

Concentration Through Time by Subject

Using select() and ggplot() to create a line plot with separate colors for each subject.

theoph |>
  select(Subject, Time, conc) |>
  ggplot(aes(x = Time,
             y = conc,
             col = Subject)) +
  geom_line()

Faceted Concentration Through Time Plots (mod 7 facet_wrap)

Using select() and ggplot() to create a faceted line plot for each subject.

theoph |>
  select(Subject, Time, conc) |>
  ggplot(aes(x = Time,
             y = conc)) +
  geom_line() +
  facet_wrap(~Subject) +
  ggtitle("Conc and Time by Subject") +
  xlab("Time (hr)") +
  ylab("Concentration (mg/l)")