install.packages(“rmarkdown”)
library(knitr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
file_path <- "/Users/nicoleborunda/Downloads/acupuncture.csv"
acu <- read_csv(file_path)
## Rows: 301 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (9): id, age, sex, migraine, chronicity, acupuncturist, group, pk1, pk5
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
### Means for All Variables
means <- apply(acu, 2, mean)
print(means)
## id age sex migraine chronicity
## 469.0066445 46.3388704 0.8405316 0.9435216 21.5980066
## acupuncturist group pk1 pk5
## 6.0299003 0.5348837 25.5689369 19.0824474
### Standard Deviations for All Variables
sds <- apply(acu, 2, sd)
print(sds)
## id age sex migraine chronicity
## 204.4381894 10.3941386 0.3667220 0.2312276 13.9521036
## acupuncturist group pk1 pk5
## 2.7560424 0.4996123 15.4239810 15.6115822
mean_pk5_by_group <- by(acu$pk5, acu$group, mean)
cat("Treatment Group\tMean pk5 Score\n")
## Treatment Group Mean pk5 Score
for (group in names(mean_pk5_by_group)) {
cat(group, "\t", mean_pk5_by_group[[group]], "\n")
}
## 0 22.34345
## 1 16.24679
I am not comfortable making a conclusion about the efficacy of acupuncture based on these means alone. Pk5 data are headache severity ratings at 1 year. To make inferences about the efficacy about the acupuncture, I would would to employ both baseline and year 1 data for the treatment and control groups. I would also want to know more about the variability within the groups so I’d be interested in the standard deviations. With that information, I could then run a t-test to see if the difference between the means was significant.
acu$diff <- acu$pk5 - acu$pk1
head(acu)
## # A tibble: 6 × 10
## id age sex migraine chronicity acupuncturist group pk1 pk5 diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104 32 1 1 14 12 0 16 15.3 -0.667
## 2 108 56 1 1 40 9 0 16.5 23.2 6.75
## 3 112 45 1 1 27 9 1 9.25 6.25 -3
## 4 113 45 1 1 30 9 1 42.5 51.2 8.75
## 5 114 49 1 1 49 9 1 24.2 25.2 1
## 6 126 47 1 1 42 5 0 21 15.2 -5.75
tail(acu)
## # A tibble: 6 × 10
## id age sex migraine chronicity acupuncturist group pk1 pk5 diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 880 36 1 1 24 8 0 17 9.75 -7.25
## 2 884 65 1 1 22 8 1 17.5 5.75 -11.8
## 3 886 33 1 1 21 4 0 21 24.2 3.25
## 4 899 49 1 1 6 8 1 29.8 16.2 -13.5
## 5 905 50 1 1 7 8 0 14.2 4 -10.2
## 6 912 43 1 1 4 8 1 11.8 9.5 -2.25
acu$remission <- ifelse(acu$pk5 - acu$pk1 >= 10, 1, 0)
head(acu)
## # A tibble: 6 × 11
## id age sex migraine chronicity acupuncturist group pk1 pk5 diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104 32 1 1 14 12 0 16 15.3 -0.667
## 2 108 56 1 1 40 9 0 16.5 23.2 6.75
## 3 112 45 1 1 27 9 1 9.25 6.25 -3
## 4 113 45 1 1 30 9 1 42.5 51.2 8.75
## 5 114 49 1 1 49 9 1 24.2 25.2 1
## 6 126 47 1 1 42 5 0 21 15.2 -5.75
## # ℹ 1 more variable: remission <dbl>
tail(acu)
## # A tibble: 6 × 11
## id age sex migraine chronicity acupuncturist group pk1 pk5 diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 880 36 1 1 24 8 0 17 9.75 -7.25
## 2 884 65 1 1 22 8 1 17.5 5.75 -11.8
## 3 886 33 1 1 21 4 0 21 24.2 3.25
## 4 899 49 1 1 6 8 1 29.8 16.2 -13.5
## 5 905 50 1 1 7 8 0 14.2 4 -10.2
## 6 912 43 1 1 4 8 1 11.8 9.5 -2.25
## # ℹ 1 more variable: remission <dbl>
library(dplyr)
mean_pk5_by_group <- acu %>%
group_by(group) %>%
summarize(mean_pk5 = mean(pk5))
print(mean_pk5_by_group)
## # A tibble: 2 × 2
## group mean_pk5
## <dbl> <dbl>
## 1 0 22.3
## 2 1 16.2
acu <- acu %>%
mutate(diff2 = pk5 - pk1)
print(acu)
## # A tibble: 301 × 12
## id age sex migraine chronicity acupuncturist group pk1 pk5 diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104 32 1 1 14 12 0 16 15.3 -0.667
## 2 108 56 1 1 40 9 0 16.5 23.2 6.75
## 3 112 45 1 1 27 9 1 9.25 6.25 -3
## 4 113 45 1 1 30 9 1 42.5 51.2 8.75
## 5 114 49 1 1 49 9 1 24.2 25.2 1
## 6 126 47 1 1 42 5 0 21 15.2 -5.75
## 7 130 46 1 1 3 7 1 21.8 1 -20.8
## 8 131 64 1 1 23 7 1 14.5 2.5 -12
## 9 135 59 1 1 40 7 0 40.5 28.8 -11.8
## 10 137 53 1 1 32 7 1 11.8 13.5 1.75
## # ℹ 291 more rows
## # ℹ 2 more variables: remission <dbl>, diff2 <dbl>
acu <- acu %>%
mutate(remission2 = ifelse(pk5 - pk1 >= 10, 1, 0))
print(acu)
## # A tibble: 301 × 13
## id age sex migraine chronicity acupuncturist group pk1 pk5 diff
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 104 32 1 1 14 12 0 16 15.3 -0.667
## 2 108 56 1 1 40 9 0 16.5 23.2 6.75
## 3 112 45 1 1 27 9 1 9.25 6.25 -3
## 4 113 45 1 1 30 9 1 42.5 51.2 8.75
## 5 114 49 1 1 49 9 1 24.2 25.2 1
## 6 126 47 1 1 42 5 0 21 15.2 -5.75
## 7 130 46 1 1 3 7 1 21.8 1 -20.8
## 8 131 64 1 1 23 7 1 14.5 2.5 -12
## 9 135 59 1 1 40 7 0 40.5 28.8 -11.8
## 10 137 53 1 1 32 7 1 11.8 13.5 1.75
## # ℹ 291 more rows
## # ℹ 3 more variables: remission <dbl>, diff2 <dbl>, remission2 <dbl>
plot(acu$pk1, acu$pk5, xlab = "Baseline Headache Severity", ylab = "1-year Headache Severity",
main = "Baseline vs 1-year Headache Severity")
ggplot(data = acu, aes(x = pk1, y = pk5)) +
geom_point() +
labs(x = "Baseline Headache Severity", y = "1-year Headache Severity",
title = "Baseline vs 1-year Headache Severity")
ggplot(acu, aes(x = pk1, y = pk5, color = factor(group), shape = factor(sex))) +
geom_point() +
labs(x = "Baseline Headache Severity Rating", y = "1-Year Headache Severity Rating",
color = "Treatment Group", shape = "Sex", title = "Baseline vs. 1-year Headache Severity") +
scale_color_manual(values = c("purple", "orange"), labels = c("Control", "Treatment")) +
scale_shape_manual(values = c(1, 16), labels = c("Male", "Female")) +
theme_bw()
# Set graphical elements treatment group and sex
colors <- c("purple", "orange")
shapes <- c(1, 16)
# Create plot
plot(acu$pk1, acu$pk5,
xlab = "Baseline Headache Severity Rating",
ylab = "1-Year Headache Severity Rating",
col = colors[acu$group + 1],
pch = shapes[acu$sex + 1],
main = "Baseline vs 1-Year Headache Severity Rating",
xlim = c(min(acu$pk1), max(acu$pk1)),
ylim = c(min(acu$pk5), max(acu$pk5)))
# Add treatment group legend
legend("topright",
legend = c("Control", "Treatment"),
col = colors,
pch = shapes[1],
title = "Treatment Group",
cex = 0.8,
bg = "white")
# Add sex legend
legend("bottomright",
legend = c("Male", "Female"),
col = "black",
pch = shapes,
title = "Sex",
cex = 0.8,
bg = "white")
# Add axis labels
mtext("Sex", side = 4, line = 3)
# Adjust plot margins
par(mar = c(5, 4, 4, 6) + 0.1)