setwd("/Users/u6022167/Desktop/GEOG5680/Module8")
getwd()
#install.packages("dplyr")
library(dplyr)
theoph = read.csv("theoph.csv")
str(theoph)
theoph$Subject = factor(theoph$Subject)Module 8 Exercise: dplyr
Setup and making a factor
Select Subject, Time, and Concentration
Using select() to create a new data frame containing only the subject ID, time, and concentration variables.
TheophTimeConc = theoph |>
select(Subject, Time, conc)
TheophTimeConc Subject Time conc
1 1 0.00 0.74
2 1 0.25 2.84
3 1 0.57 6.57
4 1 1.12 10.50
5 1 2.02 9.66
6 1 3.82 8.58
7 1 5.10 8.36
8 1 7.03 7.47
9 1 9.05 6.89
10 1 12.12 5.94
11 1 24.37 3.28
12 2 0.00 0.00
13 2 0.27 1.72
14 2 0.52 7.91
15 2 1.00 8.31
16 2 1.92 8.33
17 2 3.50 6.85
18 2 5.02 6.08
19 2 7.03 5.40
20 2 9.00 4.55
21 2 12.00 3.01
22 2 24.30 0.90
23 3 0.00 0.00
24 3 0.27 4.40
25 3 0.58 6.90
26 3 1.02 8.20
27 3 2.02 7.80
28 3 3.62 7.50
29 3 5.08 6.20
30 3 7.07 5.30
31 3 9.00 4.90
32 3 12.15 3.70
33 3 24.17 1.05
34 4 0.00 0.00
35 4 0.35 1.89
36 4 0.60 4.60
37 4 1.07 8.60
38 4 2.13 8.38
39 4 3.50 7.54
40 4 5.02 6.88
41 4 7.02 5.78
42 4 9.02 5.33
43 4 11.98 4.19
44 4 24.65 1.15
45 5 0.00 0.00
46 5 0.30 2.02
47 5 0.52 5.63
48 5 1.00 11.40
49 5 2.02 9.33
50 5 3.50 8.74
51 5 5.02 7.56
52 5 7.02 7.09
53 5 9.10 5.90
54 5 12.00 4.37
55 5 24.35 1.57
56 6 0.00 0.00
57 6 0.27 1.29
58 6 0.58 3.08
59 6 1.15 6.44
60 6 2.03 6.32
61 6 3.57 5.53
62 6 5.00 4.94
63 6 7.00 4.02
64 6 9.22 3.46
65 6 12.10 2.78
66 6 23.85 0.92
67 7 0.00 0.15
68 7 0.25 0.85
69 7 0.50 2.35
70 7 1.02 5.02
71 7 2.02 6.58
72 7 3.48 7.09
73 7 5.00 6.66
74 7 6.98 5.25
75 7 9.00 4.39
76 7 12.05 3.53
77 7 24.22 1.15
78 8 0.00 0.00
79 8 0.25 3.05
80 8 0.52 3.05
81 8 0.98 7.31
82 8 2.02 7.56
83 8 3.53 6.59
84 8 5.05 5.88
85 8 7.15 4.73
86 8 9.07 4.57
87 8 12.10 3.00
88 8 24.12 1.25
89 9 0.00 0.00
90 9 0.30 7.37
91 9 0.63 9.03
92 9 1.05 7.14
93 9 2.02 6.33
94 9 3.53 5.66
95 9 5.02 5.67
96 9 7.17 4.24
97 9 8.80 4.11
98 9 11.60 3.16
99 9 24.43 1.12
100 10 0.00 0.24
101 10 0.37 2.89
102 10 0.77 5.22
103 10 1.02 6.41
104 10 2.05 7.83
105 10 3.55 10.21
106 10 5.05 9.18
107 10 7.08 8.02
108 10 9.38 7.14
109 10 12.10 5.68
110 10 23.70 2.42
111 11 0.00 0.00
112 11 0.25 4.86
113 11 0.50 7.24
114 11 0.98 8.00
115 11 1.98 6.81
116 11 3.60 5.87
117 11 5.02 5.22
118 11 7.03 4.45
119 11 9.03 3.62
120 11 12.12 2.69
121 11 24.08 0.86
122 12 0.00 0.00
123 12 0.25 1.25
124 12 0.50 3.96
125 12 1.00 7.82
126 12 2.00 9.72
127 12 3.52 9.75
128 12 5.07 8.57
129 12 7.07 6.59
130 12 9.03 6.11
131 12 12.05 4.57
132 12 24.15 1.17
Subject Weight and Dose
Using select() and distinct() to create a data frame containing a single row for each subject with their weight and dose.
SubjDose = theoph |>
select(Subject, Wt, Dose) |>
distinct()
SubjDose Subject Wt Dose
1 1 79.6 4.02
2 2 72.4 4.40
3 3 70.5 4.53
4 4 72.7 4.40
5 5 54.6 5.86
6 6 80.0 4.00
7 7 64.6 4.95
8 8 70.5 4.53
9 9 86.4 3.10
10 10 58.2 5.50
11 11 65.0 4.92
12 12 60.5 5.30
First Subject
Using filter() to create a data frame containing only the first subject.
Subj1 = theoph |>
filter(Subject == 1)
Subj1 Subject Wt Dose Time conc
1 1 79.6 4.02 0.00 0.74
2 1 79.6 4.02 0.25 2.84
3 1 79.6 4.02 0.57 6.57
4 1 79.6 4.02 1.12 10.50
5 1 79.6 4.02 2.02 9.66
6 1 79.6 4.02 3.82 8.58
7 1 79.6 4.02 5.10 8.36
8 1 79.6 4.02 7.03 7.47
9 1 79.6 4.02 9.05 6.89
10 1 79.6 4.02 12.12 5.94
11 1 79.6 4.02 24.37 3.28
First Four Subjects
Using filter() to create a data frame containing only the first four subjects.
I initially tried:
Subj1to4 = theoph |>
filter(Subject <= 4)This did not work because Subject had been converted to a factor. I instead used %in%.
Subj1to4 = theoph |>
filter(Subject %in% 1:4)
Subj1to4 Subject Wt Dose Time conc
1 1 79.6 4.02 0.00 0.74
2 1 79.6 4.02 0.25 2.84
3 1 79.6 4.02 0.57 6.57
4 1 79.6 4.02 1.12 10.50
5 1 79.6 4.02 2.02 9.66
6 1 79.6 4.02 3.82 8.58
7 1 79.6 4.02 5.10 8.36
8 1 79.6 4.02 7.03 7.47
9 1 79.6 4.02 9.05 6.89
10 1 79.6 4.02 12.12 5.94
11 1 79.6 4.02 24.37 3.28
12 2 72.4 4.40 0.00 0.00
13 2 72.4 4.40 0.27 1.72
14 2 72.4 4.40 0.52 7.91
15 2 72.4 4.40 1.00 8.31
16 2 72.4 4.40 1.92 8.33
17 2 72.4 4.40 3.50 6.85
18 2 72.4 4.40 5.02 6.08
19 2 72.4 4.40 7.03 5.40
20 2 72.4 4.40 9.00 4.55
21 2 72.4 4.40 12.00 3.01
22 2 72.4 4.40 24.30 0.90
23 3 70.5 4.53 0.00 0.00
24 3 70.5 4.53 0.27 4.40
25 3 70.5 4.53 0.58 6.90
26 3 70.5 4.53 1.02 8.20
27 3 70.5 4.53 2.02 7.80
28 3 70.5 4.53 3.62 7.50
29 3 70.5 4.53 5.08 6.20
30 3 70.5 4.53 7.07 5.30
31 3 70.5 4.53 9.00 4.90
32 3 70.5 4.53 12.15 3.70
33 3 70.5 4.53 24.17 1.05
34 4 72.7 4.40 0.00 0.00
35 4 72.7 4.40 0.35 1.89
36 4 72.7 4.40 0.60 4.60
37 4 72.7 4.40 1.07 8.60
38 4 72.7 4.40 2.13 8.38
39 4 72.7 4.40 3.50 7.54
40 4 72.7 4.40 5.02 6.88
41 4 72.7 4.40 7.02 5.78
42 4 72.7 4.40 9.02 5.33
43 4 72.7 4.40 11.98 4.19
44 4 72.7 4.40 24.65 1.15
Average Concentration by Subject
Using group_by() and summarise() to calculate the average concentration for each subject across all sampling times.
avgConc = theoph |>
group_by(Subject) |>
summarise(avgConc = mean(conc))
avgConc# A tibble: 12 × 2
Subject avgConc
<fct> <dbl>
1 1 6.44
2 2 4.82
3 3 5.09
4 4 4.94
5 5 5.78
6 6 3.53
7 7 3.91
8 8 4.27
9 9 4.89
10 10 5.93
11 11 4.51
12 12 5.41
Concentration Through Time for Subject 1
Using filter() and ggplot() to create a line plot of concentration through time for the first subject.
library(ggplot2)
theoph |>
filter(Subject == 1) |>
ggplot(aes(x = Time, y = conc)) +
geom_line()Concentration Through Time by Subject
Using select() and ggplot() to create a line plot with separate colors for each subject.
theoph |>
select(Subject, Time, conc) |>
ggplot(aes(x = Time,
y = conc,
col = Subject)) +
geom_line()Faceted Concentration Through Time Plots (mod 7 facet_wrap)
Using select() and ggplot() to create a faceted line plot for each subject.
theoph |>
select(Subject, Time, conc) |>
ggplot(aes(x = Time,
y = conc)) +
geom_line() +
facet_wrap(~Subject) +
ggtitle("Conc and Time by Subject") +
xlab("Time (hr)") +
ylab("Concentration (mg/l)")