library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(tidyr)
install.packages("statsr", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//Rtmp4MKS14/downloaded_packages
library("statsr")
## Loading required package: BayesFactor
## Warning: package 'BayesFactor' was built under R version 4.3.2
## Loading required package: coda
## Warning: package 'coda' was built under R version 4.3.2
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## ************
## Welcome to BayesFactor 0.9.12-4.7. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
install.packages("nlme", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//Rtmp4MKS14/downloaded_packages
library(nlme)
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
install.packages("emmeans", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//Rtmp4MKS14/downloaded_packages
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.3.2
install.packages("GGally", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//Rtmp4MKS14/downloaded_packages
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'GGally'
##
## The following object is masked from 'package:emmeans':
##
## pigs
# Reading the url file and creating a data frame
url= "https://raw.githubusercontent.com/ursulapodosenin/Projects/main/Bells%20Palsy%20Clinical%20Trial.csv"
BellsPalsyData<-as.data.frame(read.csv(url))
head(BellsPalsyData)
## Patient.ID Sex Age Baseline.Score.on.House.Brackmann.scale
## 1 1 Female 77 6
## 2 2 Female 61 6
## 3 3 Female 46 4
## 4 4 Female 46 3
## 5 5 Female 42 3
## 6 6 Female 50 4
## Time.between.onset.of.symptoms.and.start.of.treatment Treatment.Group
## 1 Within 24 hr Prednisolone–Placebo
## 2 Within 24 hr Prednisolone–Placebo
## 3 >24 to ≤48 hr Prednisolone–Placebo
## 4 Within 24 hr Prednisolone–Placebo
## 5 >24 to ≤48 hr Prednisolone–Placebo
## 6 >48 to ≤72 hr Prednisolone–Placebo
## Received.Prednisolone Received.Acyclovir
## 1 Yes No
## 2 Yes No
## 3 Yes No
## 4 Yes No
## 5 Yes No
## 6 Yes No
## X3.Month.Score.on.House.Brackmann.scale Full.Recovery.in.3.Months
## 1 2 No
## 2 1 Yes
## 3 1 Yes
## 4 1 Yes
## 5 1 Yes
## 6 1 Yes
## X9.Month.Score.on.House.Brackmann.scale Full.Recovery.in.9.Months
## 1 2 No
## 2 1 Yes
## 3 1 Yes
## 4 1 Yes
## 5 1 Yes
## 6 1 Yes
# Creating a function that provides the value of the mode
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
# Calculating the total number of males and females in the study
total_females<-sum(BellsPalsyData=="Female")
total_males<-sum(BellsPalsyData=="Male")
total_females
## [1] 238
total_males
## [1] 256
# Calculating the total number of participants in the control group and comparing it to the total number of participants
control_total<-sum(BellsPalsyData$Treatment.Group== "Placebo–Placebo")
control_total
## [1] 122
noncontrol_total<-sum(BellsPalsyData$Treatment.Group!= "Placebo–Placebo")
noncontrol_total
## [1] 372
control_ratio<-(control_total/noncontrol_total)
control_ratio
## [1] 0.327957
# Obtaining summary statistics regarding age, grouped by gender
summ_stats_by_sex<-
BellsPalsyData|>
group_by(Sex)|>
summarise(mean_age= mean(Age),
median_age= median(Age),
mode_age= Mode(Age),
min_age= min(Age),
max_age= max(Age),
sd_age= sd(Age) )
summ_stats_by_sex
## # A tibble: 2 × 7
## Sex mean_age median_age mode_age min_age max_age sd_age
## <chr> <dbl> <dbl> <int> <int> <int> <dbl>
## 1 Female 45.0 44 34 16 90 14.4
## 2 Male 44.8 44 36 16 89 14.7
# Creating an additional age group column that groups all participants into an age category
BellsPalsyData['agegroup'] = case_when(BellsPalsyData$Age >= 0 & BellsPalsyData$Age <= 24 ~ 1,
BellsPalsyData$Age >= 25 & BellsPalsyData$Age <= 49 ~ 2,
BellsPalsyData$Age >= 50 & BellsPalsyData$Age <= 74 ~ 3,
BellsPalsyData$Age >= 75 & BellsPalsyData$Age <= 100 ~ 4)
# Creating a graph that looks at the distribution of age groups in the study
ggplot(data=BellsPalsyData, aes(x=agegroup))+
geom_bar(binwidth = 5, color="black", fill="blue")+
theme_classic()+
labs(title = "Frequency of Age Groups", x = "Age Group", y = "Frequency")
## Warning in geom_bar(binwidth = 5, color = "black", fill = "blue"): Ignoring
## unknown parameters: `binwidth`

# The House–Brackmann score is a score to grade the degree of nerve damage in a facial nerve palsy
# Looking at the most common time frame between the onset of symptoms and start of treatment
Mode_BP_Data<- Mode(BellsPalsyData$Time.between.onset.of.symptoms.and.start.of.treatment)
Mode_BP_Data
## [1] "Within 24 hr"
# Creating an additional column that looks the difference in recovery from baseline compared to 3 months of treatment
BellsPalsyData['difference_3']= BellsPalsyData$X3.Month.Score.on.House.Brackmann.scale
-BellsPalsyData$Baseline.Score.on.House.Brackmann.scale
## [1] -6 -6 -4 -3 -3 -4 -4 -4 -4 -4 -4 -2 -3 -4 -4 -4 -2 -3 -4 -2 -3 -4 -2 -4 -4
## [26] -3 -4 -5 -2 -3 -4 -3 -3 -2 -5 -3 -2 -6 -4 -2 -4 -5 -6 -4 -3 -3 -4 -3 -2 -6
## [51] -5 -3 -3 -2 -3 -4 -3 -2 -4 -3 -5 -3 -3 -5 -5 -2 -3 -2 -4 -4 -3 -2 -3 -6 -4
## [76] -4 -6 -2 -3 -5 -4 -3 -3 -4 -3 -3 -2 -3 -4 -4 -3 -3 -5 -5 -4 -4 -2 -4 -2 -2
## [101] -2 -4 -5 -2 -5 -5 -4 -3 -2 -2 -4 -5 -3 -4 -5 -3 -5 -2 -3 -4 -4 -5 -4 -3 -3
## [126] -3 -3 -3 -3 -3 -3 -4 -3 -4 -4 -3 -4 -4 -4 -3 -4 -3 -6 -5 -6 -3 -4 -5 -4 -3
## [151] -5 -4 -2 -3 -5 -5 -2 -4 -5 -4 -4 -2 -2 -5 -4 -2 -3 -3 -5 -4 -6 -4 -5 -3 -4
## [176] -5 -4 -5 -5 -3 -5 -2 -3 -4 -2 -3 -3 -3 -6 -5 -5 -4 -4 -5 -5 -5 -2 -2 -3 -4
## [201] -2 -4 -3 -6 -4 -3 -4 -4 -3 -2 -2 -2 -4 -4 -2 -2 -3 -3 -4 -2 -4 -4 -2 -2 -4
## [226] -3 -4 -3 -4 -3 -5 -4 -3 -2 -5 -2 -2 -3 -2 -4 -5 -4 -3 -2 -6 -3 -5 -3 -5 -2
## [251] -2 -5 -3 -2 -3 -4 -4 -5 -4 -4 -4 -4 -4 -4 -5 -4 -4 -2 -4 -4 -2 -2 -5 -3 -4
## [276] -4 -5 -5 -3 -3 -3 -3 -4 -4 -4 -5 -4 -2 -3 -4 -5 -2 -2 -4 -3 -5 -2 -3 -4 -4
## [301] -3 -6 -4 -4 -3 -4 -3 -2 -4 -2 -5 -2 -6 -4 -6 -5 -2 -4 -2 -4 -2 -4 -5 -3 -4
## [326] -4 -4 -4 -4 -4 -6 -4 -4 -3 -3 -5 -4 -4 -3 -3 -5 -4 -3 -5 -5 -6 -3 -4 -3 -3
## [351] -3 -6 -4 -5 -3 -6 -5 -4 -4 -5 -4 -3 -3 -3 -5 -3 -2 -6 -2 -4 -4 -6 -3 -3 -4
## [376] -4 -3 -2 -3 -2 -3 -3 -3 -2 -4 -2 -2 -3 -4 -2 -2 -2 -2 -4 -3 -4 -3 -3 -6 -2
## [401] -3 -2 -4 -6 -5 -5 -4 -6 -3 -4 -4 -5 -4 -3 -4 -4 -4 -5 -6 -4 -6 -4 -4 -5 -4
## [426] -4 -5 -4 -4 -4 -3 -2 -5 -5 -4 -3 -6 -4 -3 -3 -4 -3 -3 -4 -4 -3 -4 -3 -3 -2
## [451] -5 -2 -6 -4 -4 -3 -5 -3 -4 -2 -3 -5 -3 -5 -3 -6 -4 -3 -5 -6 -2 -5 -2 -6 -4
## [476] -5 -3 -5 -4 -2 -2 -4 -3 -6 -5 -3 -4 -3 -4 -5 -3 -3 -4 -5
# Creating a graphical representation of the difference between the ending score on the House Brackmann scale and the starting score
ggplot(data=BellsPalsyData, aes(x=difference_3))+
geom_bar(binwidth = 10, color="black", fill="green")+
theme_classic()+
labs(title = "House Brackmann Score 3 Months After Initial Score", x = "House Brackmann Score", y = "Frequency")
## Warning in geom_bar(binwidth = 10, color = "black", fill = "green"): Ignoring
## unknown parameters: `binwidth`

# Creating an additional column that looks the difference in recovery from baseline compared to 9 months of treatment
BellsPalsyData['difference_3']= BellsPalsyData$X9.Month.Score.on.House.Brackmann.scale
-BellsPalsyData$Baseline.Score.on.House.Brackmann.scale
## [1] -6 -6 -4 -3 -3 -4 -4 -4 -4 -4 -4 -2 -3 -4 -4 -4 -2 -3 -4 -2 -3 -4 -2 -4 -4
## [26] -3 -4 -5 -2 -3 -4 -3 -3 -2 -5 -3 -2 -6 -4 -2 -4 -5 -6 -4 -3 -3 -4 -3 -2 -6
## [51] -5 -3 -3 -2 -3 -4 -3 -2 -4 -3 -5 -3 -3 -5 -5 -2 -3 -2 -4 -4 -3 -2 -3 -6 -4
## [76] -4 -6 -2 -3 -5 -4 -3 -3 -4 -3 -3 -2 -3 -4 -4 -3 -3 -5 -5 -4 -4 -2 -4 -2 -2
## [101] -2 -4 -5 -2 -5 -5 -4 -3 -2 -2 -4 -5 -3 -4 -5 -3 -5 -2 -3 -4 -4 -5 -4 -3 -3
## [126] -3 -3 -3 -3 -3 -3 -4 -3 -4 -4 -3 -4 -4 -4 -3 -4 -3 -6 -5 -6 -3 -4 -5 -4 -3
## [151] -5 -4 -2 -3 -5 -5 -2 -4 -5 -4 -4 -2 -2 -5 -4 -2 -3 -3 -5 -4 -6 -4 -5 -3 -4
## [176] -5 -4 -5 -5 -3 -5 -2 -3 -4 -2 -3 -3 -3 -6 -5 -5 -4 -4 -5 -5 -5 -2 -2 -3 -4
## [201] -2 -4 -3 -6 -4 -3 -4 -4 -3 -2 -2 -2 -4 -4 -2 -2 -3 -3 -4 -2 -4 -4 -2 -2 -4
## [226] -3 -4 -3 -4 -3 -5 -4 -3 -2 -5 -2 -2 -3 -2 -4 -5 -4 -3 -2 -6 -3 -5 -3 -5 -2
## [251] -2 -5 -3 -2 -3 -4 -4 -5 -4 -4 -4 -4 -4 -4 -5 -4 -4 -2 -4 -4 -2 -2 -5 -3 -4
## [276] -4 -5 -5 -3 -3 -3 -3 -4 -4 -4 -5 -4 -2 -3 -4 -5 -2 -2 -4 -3 -5 -2 -3 -4 -4
## [301] -3 -6 -4 -4 -3 -4 -3 -2 -4 -2 -5 -2 -6 -4 -6 -5 -2 -4 -2 -4 -2 -4 -5 -3 -4
## [326] -4 -4 -4 -4 -4 -6 -4 -4 -3 -3 -5 -4 -4 -3 -3 -5 -4 -3 -5 -5 -6 -3 -4 -3 -3
## [351] -3 -6 -4 -5 -3 -6 -5 -4 -4 -5 -4 -3 -3 -3 -5 -3 -2 -6 -2 -4 -4 -6 -3 -3 -4
## [376] -4 -3 -2 -3 -2 -3 -3 -3 -2 -4 -2 -2 -3 -4 -2 -2 -2 -2 -4 -3 -4 -3 -3 -6 -2
## [401] -3 -2 -4 -6 -5 -5 -4 -6 -3 -4 -4 -5 -4 -3 -4 -4 -4 -5 -6 -4 -6 -4 -4 -5 -4
## [426] -4 -5 -4 -4 -4 -3 -2 -5 -5 -4 -3 -6 -4 -3 -3 -4 -3 -3 -4 -4 -3 -4 -3 -3 -2
## [451] -5 -2 -6 -4 -4 -3 -5 -3 -4 -2 -3 -5 -3 -5 -3 -6 -4 -3 -5 -6 -2 -5 -2 -6 -4
## [476] -5 -3 -5 -4 -2 -2 -4 -3 -6 -5 -3 -4 -3 -4 -5 -3 -3 -4 -5
# Creating a graphical representation of the difference between the ending score on the House Brackmann scale and the starting score
ggplot(data=BellsPalsyData, aes(x=difference_3))+
geom_bar(binwidth = 10, color="black", fill="blue")+
theme_classic()+
labs(title = "House Brackmann Score 9 Months After Initial Score", x = "House Brackmann Score", y = "Frequency")
## Warning in geom_bar(binwidth = 10, color = "black", fill = "blue"): Ignoring
## unknown parameters: `binwidth`

# Creating a new data frame for side-by-side comparison
comparison_data <- data.frame(
ScoreType = rep(c("3 Month", "Baseline"), each = nrow(BellsPalsyData)),
Score = c(BellsPalsyData$X3.Month.Score.on.House.Brackmann.scale,
BellsPalsyData$Baseline.Score.on.House.Brackmann.scale)
)
# Plotting side-by-side bar graph
ggplot(comparison_data, aes(x = ScoreType, y = Score, fill = ScoreType)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Scores After 3 Months",
x = "Time Point",
y = "Score on House-Brackmann Scale") +
scale_fill_manual(values = c("purple", "red")) +
theme_minimal()

# Creating a new data frame for side-by-side comparison
comparison_data2 <- data.frame(
ScoreType = rep(c("9 Month", "Baseline"), each = nrow(BellsPalsyData)),
Score = c(BellsPalsyData$X9.Month.Score.on.House.Brackmann.scale,
BellsPalsyData$Baseline.Score.on.House.Brackmann.scale)
)
# Plotting side-by-side bar graph
ggplot(comparison_data2, aes(x = ScoreType, y = Score, fill = ScoreType)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Scores After 9 Months",
x = "Time Point",
y = "Score on House-Brackmann Scale") +
scale_fill_manual(values = c("blue", "green")) +
theme_minimal()

# Getting summary statistics by treatment group
initial_summ_stats_by_treatment_group<-as.data.frame(
BellsPalsyData|>
group_by(Treatment.Group)|>
summarise(mean_bs= mean(Baseline.Score.on.House.Brackmann.scale),
median_bs= median(Baseline.Score.on.House.Brackmann.scale),
mode_bs= Mode(Baseline.Score.on.House.Brackmann.scale),
min_bs= min(Baseline.Score.on.House.Brackmann.scale),
max_bs= max(Baseline.Score.on.House.Brackmann.scale),
sd_bs= sd(Baseline.Score.on.House.Brackmann.scale) ))
initial_summ_stats_by_treatment_group
## Treatment.Group mean_bs median_bs mode_bs min_bs max_bs sd_bs
## 1 Acyclovir–Placebo 3.803279 4 4 2 6 1.103258
## 2 Acyclovir–Prednisolone 3.606299 4 3 2 6 1.127981
## 3 Placebo–Placebo 3.737705 4 4 2 6 1.169893
## 4 Prednisolone–Placebo 3.577236 4 4 2 6 1.123578
nine_summ_stats_by_treatment_group<-as.data.frame(
BellsPalsyData|>
group_by(Treatment.Group)|>
summarise(mean_9s= mean(X9.Month.Score.on.House.Brackmann.scale),
median_9s= median(X9.Month.Score.on.House.Brackmann.scale),
mode_9s= Mode(X9.Month.Score.on.House.Brackmann.scale),
min_9s= min(X9.Month.Score.on.House.Brackmann.scale),
max_9s= max(X9.Month.Score.on.House.Brackmann.scale),
sd_9s= sd(X9.Month.Score.on.House.Brackmann.scale) ))
nine_summ_stats_by_treatment_group
## Treatment.Group mean_9s median_9s mode_9s min_9s max_9s sd_9s
## 1 Acyclovir–Placebo 1.213115 1 1 1 4 0.5637758
## 2 Acyclovir–Prednisolone 1.086614 1 1 1 3 0.3338953
## 3 Placebo–Placebo 1.213115 1 1 1 4 0.5782491
## 4 Prednisolone–Placebo 1.065041 1 1 1 3 0.2787512
# Looking at the difference in means across treatment groups from the initial score to the 9 month score
nine_summ_stats_by_treatment_group$mean_9s-initial_summ_stats_by_treatment_group$mean_bs
## [1] -2.590164 -2.519685 -2.524590 -2.512195
# Extracting the placebo group and putting it into a data frame
which(BellsPalsyData$Treatment.Group== "Placebo–Placebo")
## [1] 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
## [19] 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
## [37] 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
## [55] 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
## [73] 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
## [91] 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
## [109] 481 482 483 484 485 486 487 488 489 490 491 492 493 494
placebo_group<-BellsPalsyData[BellsPalsyData$Treatment.Group== "Placebo–Placebo", ]
glimpse(placebo_group)
## Rows: 122
## Columns: 14
## $ Patient.ID <int> 373, 374, 375, 3…
## $ Sex <chr> "Male", "Female"…
## $ Age <int> 59, 43, 65, 48, …
## $ Baseline.Score.on.House.Brackmann.scale <int> 3, 3, 4, 4, 3, 2…
## $ Time.between.onset.of.symptoms.and.start.of.treatment <chr> ">24 to ≤48 hr",…
## $ Treatment.Group <chr> "Placebo–Placebo…
## $ Received.Prednisolone <chr> "No", "No", "No"…
## $ Received.Acyclovir <chr> "No", "No", "No"…
## $ X3.Month.Score.on.House.Brackmann.scale <int> 1, 1, 3, 2, 2, 2…
## $ Full.Recovery.in.3.Months <chr> "Yes", "Yes", "N…
## $ X9.Month.Score.on.House.Brackmann.scale <int> 1, 1, 3, 1, 2, 2…
## $ Full.Recovery.in.9.Months <chr> "Yes", "Yes", "N…
## $ agegroup <dbl> 3, 2, 3, 2, 2, 3…
## $ difference_3 <int> 1, 1, 3, 1, 2, 2…
# Seeing if there is a correlation between baseline scores and 3 month, and 9 month scores by treatment group and age group
BellsPalsyData|>
group_by(Treatment.Group, agegroup)|>
summarise(cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale),
cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale),)
## Warning: There were 5 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `cor(Baseline.Score.on.House.Brackmann.scale,
## X3.Month.Score.on.House.Brackmann.scale)`.
## ℹ In group 5: `Treatment.Group = "Acyclovir–Prednisolone"` and `agegroup = 1`.
## Caused by warning in `cor()`:
## ! the standard deviation is zero
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
## `summarise()` has grouped output by 'Treatment.Group'. You can override using
## the `.groups` argument.
## # A tibble: 16 × 4
## # Groups: Treatment.Group [4]
## Treatment.Group agegroup cor(Baseline.Score.o…¹ cor(Baseline.Score.o…²
## <chr> <dbl> <dbl> <dbl>
## 1 Acyclovir–Placebo 1 0.415 0.248
## 2 Acyclovir–Placebo 2 0.109 0.0528
## 3 Acyclovir–Placebo 3 0.364 0.434
## 4 Acyclovir–Placebo 4 0.816 0.707
## 5 Acyclovir–Prednisolone 1 NA NA
## 6 Acyclovir–Prednisolone 2 0.0103 0.00649
## 7 Acyclovir–Prednisolone 3 0.258 0.183
## 8 Acyclovir–Prednisolone 4 NA NA
## 9 Placebo–Placebo 1 0.304 NA
## 10 Placebo–Placebo 2 0.219 0.181
## 11 Placebo–Placebo 3 0.167 -0.0688
## 12 Placebo–Placebo 4 0.732 0.642
## 13 Prednisolone–Placebo 1 NA NA
## 14 Prednisolone–Placebo 2 0.0364 0.0294
## 15 Prednisolone–Placebo 3 -0.0836 -0.0845
## 16 Prednisolone–Placebo 4 0.189 0.945
## # ℹ abbreviated names:
## # ¹​`cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale)`,
## # ²​`cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale)`
# Seeing if there is a correlation between baseline scores and 3 month, and 9 month scores by treatment group and sex
BellsPalsyData|>
group_by(Treatment.Group, Sex)|>
summarise(cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale),
cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale),)
## `summarise()` has grouped output by 'Treatment.Group'. You can override using
## the `.groups` argument.
## # A tibble: 8 × 4
## # Groups: Treatment.Group [4]
## Treatment.Group Sex cor(Baseline.Score.on.H…¹ cor(Baseline.Score.o…²
## <chr> <chr> <dbl> <dbl>
## 1 Acyclovir–Placebo Female 0.278 0.332
## 2 Acyclovir–Placebo Male 0.372 0.348
## 3 Acyclovir–Prednisolone Female 0.117 0.0365
## 4 Acyclovir–Prednisolone Male 0.239 0.153
## 5 Placebo–Placebo Female 0.283 0.140
## 6 Placebo–Placebo Male 0.272 0.209
## 7 Prednisolone–Placebo Female 0.0983 0.320
## 8 Prednisolone–Placebo Male -0.0369 -0.0958
## # ℹ abbreviated names:
## # ¹​`cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale)`,
## # ²​`cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale)`
# Visualizing correlation between baseline and 9-month House Brackmann scores
ggplot(data = BellsPalsyData, aes(x = Baseline.Score.on.House.Brackmann.scale, y = X9.Month.Score.on.House.Brackmann.scale)) +
geom_jitter() +
theme_classic()

# Only looking at placebo as that is my target population
placebo_group<-BellsPalsyData[BellsPalsyData$Treatment.Group== "Placebo–Placebo", ]
# Grouping by the variable "Sex" and fitting linear models
model1 <- placebo_group |>
group_by(Sex) |>
do(model_summary = summary(lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .)))
# Printing summaries
print(model1$model_summary)
## [[1]]
##
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36825 -0.23306 -0.16547 -0.09788 2.69934
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.96269 0.23712 4.060 0.000138
## Baseline.Score.on.House.Brackmann.scale 0.06759 0.06024 1.122 0.266129
##
## (Intercept) ***
## Baseline.Score.on.House.Brackmann.scale
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5981 on 63 degrees of freedom
## Multiple R-squared: 0.01959, Adjusted R-squared: 0.004028
## F-statistic: 1.259 on 1 and 63 DF, p-value: 0.2661
##
##
## [[2]]
##
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45212 -0.23862 -0.13187 -0.02512 2.54788
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.81162 0.26192 3.099 0.00306 **
## Baseline.Score.on.House.Brackmann.scale 0.10675 0.06731 1.586 0.11850
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5512 on 55 degrees of freedom
## Multiple R-squared: 0.04373, Adjusted R-squared: 0.02634
## F-statistic: 2.515 on 1 and 55 DF, p-value: 0.1185
# Fitting linear models for each group
model1 <- placebo_group |>
group_by(Sex) |>
do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))
# Extracting coefficients for each model
coefficients_df <- model1 |>
summarise(intercept = coef(model)[1], slope = coef(model)[2])
# Fit linear models for each group
model1 <- placebo_group |>
group_by(Sex) |>
do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))
# Creating a jitter plot with line of best fit
ggplot(placebo_group, aes(x = Baseline.Score.on.House.Brackmann.scale, y = X9.Month.Score.on.House.Brackmann.scale, color = Sex)) +
geom_jitter(position = position_jitter(width = 0.1), alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE) +
labs(x = "Baseline Score on House-Brackmann scale", y = "9 Month Score on House-Brackmann scale", color = "Sex") +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

# Extracting residuals for each group
residuals_df <- model1 |>
summarise(residuals = resid(model))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Creatig a QQ plot of residuals
qqnorm(residuals_df$residuals)
qqline(residuals_df$residuals)

# Creating a histogram of the residuals
hist(residuals_df$residuals, breaks = 10, main = "Histogram of Residuals", xlab = "Residuals")

# Grouping by the variable Sex to fit the linear models
model2 <- placebo_group |>
group_by(agegroup) |>
do(model_summary = summary(lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .)))
## Warning in summary.lm(lm(X9.Month.Score.on.House.Brackmann.scale ~
## Baseline.Score.on.House.Brackmann.scale, : essentially perfect fit: summary may
## be unreliable
# Printing summaries
print(model2$model_summary)
## [[1]]
##
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## 0 0 0 0 0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1 0 Inf <2e-16
## Baseline.Score.on.House.Brackmann.scale 0 0 NaN NaN
##
## (Intercept) ***
## Baseline.Score.on.House.Brackmann.scale
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0 on 8 degrees of freedom
## Multiple R-squared: NaN, Adjusted R-squared: NaN
## F-statistic: NaN on 1 and 8 DF, p-value: NA
##
##
## [[2]]
##
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.31920 -0.16636 -0.08995 -0.08995 2.75722
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.86069 0.20229 4.255 6.86e-05
## Baseline.Score.on.House.Brackmann.scale 0.07642 0.05142 1.486 0.142
##
## (Intercept) ***
## Baseline.Score.on.House.Brackmann.scale
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4646 on 65 degrees of freedom
## Multiple R-squared: 0.03286, Adjusted R-squared: 0.01798
## F-statistic: 2.209 on 1 and 65 DF, p-value: 0.1421
##
##
## [[3]]
##
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3680 -0.3291 -0.2903 -0.2126 1.7097
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.44563 0.35781 4.040 0.000251
## Baseline.Score.on.House.Brackmann.scale -0.03883 0.09133 -0.425 0.673065
##
## (Intercept) ***
## Baseline.Score.on.House.Brackmann.scale
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6554 on 38 degrees of freedom
## Multiple R-squared: 0.004736, Adjusted R-squared: -0.02146
## F-statistic: 0.1808 on 1 and 38 DF, p-value: 0.6731
##
##
## [[4]]
##
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale,
## data = .)
##
## Residuals:
## 1 2 3 4 5
## 0.5 -1.0 -0.5 -0.5 1.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.500 1.669 -0.300 0.784
## Baseline.Score.on.House.Brackmann.scale 0.500 0.345 1.449 0.243
##
## Residual standard error: 1.155 on 3 degrees of freedom
## Multiple R-squared: 0.4118, Adjusted R-squared: 0.2157
## F-statistic: 2.1 on 1 and 3 DF, p-value: 0.2432
# Fitting the linear models for each group
model2 <- placebo_group |>
group_by(agegroup) |>
do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))
# Creating a Jitter plot with line of best fit
ggplot(placebo_group, aes(x = Baseline.Score.on.House.Brackmann.scale, y = X9.Month.Score.on.House.Brackmann.scale, color = agegroup)) +
geom_jitter(position = position_jitter(width = 0.1), alpha = 0.5) +
geom_smooth(method = "lm", aes(group = agegroup), se = FALSE, data = placebo_group) +
labs(x = "Baseline Score on House-Brackmann scale", y = "X9 Month Score on House-Brackmann scale", color = "Age Group") +
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

# Fitting the linear models for each group
model2 <- placebo_group |>
group_by(agegroup) |>
do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))
# Extracting residuals for each group
residuals_df <- model2 |>
summarise(residuals = resid(model))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Creating a QQ plot of residuals
qqnorm(residuals_df$residuals)
qqline(residuals_df$residuals)

# Creating a histogram of residuals
hist(residuals_df$residuals, breaks = seq(min(residuals_df$residuals), max(residuals_df$residuals) + 0.5, by = 0.5),
main = "Histogram of Residuals", xlab = "Residuals")

# Checking for outliers in baseline and 9-month scores
outlier_detection <- BellsPalsyData %>%
select(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale) %>%
gather(key = "time_point", value = "score", Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale) %>%
ggplot(aes(x = score)) +
geom_boxplot() +
facet_wrap(~time_point, scales = "free") +
labs(x = "Score", y = "Frequency", title = "Outlier Detection") +
theme_classic()
outlier_detection
