library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(tidyr)
install.packages("statsr", repos = "http://cran.us.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpAJ2d4R/downloaded_packages
library("statsr")
## Loading required package: BayesFactor
## Warning: package 'BayesFactor' was built under R version 4.3.2
## Loading required package: coda
## Warning: package 'coda' was built under R version 4.3.2
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## ************
## Welcome to BayesFactor 0.9.12-4.7. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
## 
## Type BFManual() to open the manual.
## ************
install.packages("nlme", repos = "http://cran.us.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpAJ2d4R/downloaded_packages
library(nlme)
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:dplyr':
## 
##     collapse
install.packages("emmeans", repos = "http://cran.us.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpAJ2d4R/downloaded_packages
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.3.2
# Reading the url file and creating a data frame
url= "https://raw.githubusercontent.com/ursulapodosenin/Projects/main/Bells%20Palsy%20Clinical%20Trial.csv"
BellsPalsyData<-as.data.frame(read.csv(url))
head(BellsPalsyData)
##   Patient.ID    Sex Age Baseline.Score.on.House.Brackmann.scale
## 1          1 Female  77                                       6
## 2          2 Female  61                                       6
## 3          3 Female  46                                       4
## 4          4 Female  46                                       3
## 5          5 Female  42                                       3
## 6          6 Female  50                                       4
##   Time.between.onset.of.symptoms.and.start.of.treatment      Treatment.Group
## 1                                          Within 24 hr Prednisolone–Placebo
## 2                                          Within 24 hr Prednisolone–Placebo
## 3                                         >24 to ≤48 hr Prednisolone–Placebo
## 4                                          Within 24 hr Prednisolone–Placebo
## 5                                         >24 to ≤48 hr Prednisolone–Placebo
## 6                                         >48 to ≤72 hr Prednisolone–Placebo
##   Received.Prednisolone Received.Acyclovir
## 1                   Yes                 No
## 2                   Yes                 No
## 3                   Yes                 No
## 4                   Yes                 No
## 5                   Yes                 No
## 6                   Yes                 No
##   X3.Month.Score.on.House.Brackmann.scale Full.Recovery.in.3.Months
## 1                                       2                        No
## 2                                       1                       Yes
## 3                                       1                       Yes
## 4                                       1                       Yes
## 5                                       1                       Yes
## 6                                       1                       Yes
##   X9.Month.Score.on.House.Brackmann.scale Full.Recovery.in.9.Months
## 1                                       2                        No
## 2                                       1                       Yes
## 3                                       1                       Yes
## 4                                       1                       Yes
## 5                                       1                       Yes
## 6                                       1                       Yes
# Creating a function that provides the value of the mode
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

# Calculating the total number of males and females in the study 
total_females<-sum(BellsPalsyData=="Female")
total_males<-sum(BellsPalsyData=="Male")
total_females
## [1] 238
total_males
## [1] 256
# Calculating the total number of participants in the control group and comparing it to the total number of participants
control_total<-sum(BellsPalsyData$Treatment.Group== "Placebo–Placebo")
control_total
## [1] 122
noncontrol_total<-sum(BellsPalsyData$Treatment.Group!= "Placebo–Placebo")
noncontrol_total
## [1] 372
control_ratio<-(control_total/noncontrol_total)
control_ratio
## [1] 0.327957
# Getting the total number of participants, men and women
sum_total_f<-sum(BellsPalsyData$Sex== "Female")
sum_total_f
## [1] 238
sum_total_m<- sum(BellsPalsyData$Sex== "Male")
sum_total_m
## [1] 256
# Obtaining summary statistics regarding age, grouped by gender
summ_stats_by_sex<-
  BellsPalsyData|>
    group_by(Sex)|>
     summarise(mean_age= mean(Age),
              median_age= median(Age),
              mode_age= Mode(Age),
              min_age= min(Age),
              max_age= max(Age),
              sd_age= sd(Age) )
summ_stats_by_sex
## # A tibble: 2 × 7
##   Sex    mean_age median_age mode_age min_age max_age sd_age
##   <chr>     <dbl>      <dbl>    <int>   <int>   <int>  <dbl>
## 1 Female     45.0         44       34      16      90   14.4
## 2 Male       44.8         44       36      16      89   14.7
# Looking at the total number of males and females
f_sum<-sum(BellsPalsyData$Sex=="Female")
f_sum
## [1] 238
m_sum<-sum(BellsPalsyData$Sex=="Male")
m_sum
## [1] 256
# Looking at the total number of individuals in the placebo group
p_sum<-sum(BellsPalsyData$Treatment.Group=="Placebo–Placebo")
p_sum
## [1] 122
mp_sum<-sum(BellsPalsyData$Treatment.Group=="Placebo–Placebo" & BellsPalsyData$Sex== "Male")
mp_sum
## [1] 57
fp_sum<-sum(BellsPalsyData$Treatment.Group=="Placebo–Placebo" & BellsPalsyData$Sex== "Female")
fp_sum
## [1] 65
# Extracting the placebo group and putting it into a data frame
which(BellsPalsyData$Treatment.Group== "Placebo–Placebo")
##   [1] 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
##  [19] 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
##  [37] 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
##  [55] 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
##  [73] 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
##  [91] 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
## [109] 481 482 483 484 485 486 487 488 489 490 491 492 493 494
placebo_group<-BellsPalsyData[BellsPalsyData$Treatment.Group== "Placebo–Placebo", ]
head(placebo_group)
##     Patient.ID    Sex Age Baseline.Score.on.House.Brackmann.scale
## 373        373   Male  59                                       3
## 374        374 Female  43                                       3
## 375        375 Female  65                                       4
## 376        376 Female  48                                       4
## 377        377   Male  49                                       3
## 378        378   Male  60                                       2
##     Time.between.onset.of.symptoms.and.start.of.treatment Treatment.Group
## 373                                         >24 to ≤48 hr Placebo–Placebo
## 374                                          Within 24 hr Placebo–Placebo
## 375                                          Within 24 hr Placebo–Placebo
## 376                                          Within 24 hr Placebo–Placebo
## 377                                         >24 to ≤48 hr Placebo–Placebo
## 378                                          Within 24 hr Placebo–Placebo
##     Received.Prednisolone Received.Acyclovir
## 373                    No                 No
## 374                    No                 No
## 375                    No                 No
## 376                    No                 No
## 377                    No                 No
## 378                    No                 No
##     X3.Month.Score.on.House.Brackmann.scale Full.Recovery.in.3.Months
## 373                                       1                       Yes
## 374                                       1                       Yes
## 375                                       3                        No
## 376                                       2                        No
## 377                                       2                        No
## 378                                       2                        No
##     X9.Month.Score.on.House.Brackmann.scale Full.Recovery.in.9.Months
## 373                                       1                       Yes
## 374                                       1                       Yes
## 375                                       3                        No
## 376                                       1                       Yes
## 377                                       2                        No
## 378                                       2                        No
# Obtaining summary statistics regarding age, grouped by gender
summ_stats_by_sex_and_gender<-
  placebo_group|>
    group_by(Sex)|>
     summarise(mean_age= mean(Age),
              median_age= median(Age),
              mode_age= Mode(Age),
              min_age= min(Age),
              max_age= max(Age),
              sd_age= sd(Age) )
summ_stats_by_sex_and_gender
## # A tibble: 2 × 7
##   Sex    mean_age median_age mode_age min_age max_age sd_age
##   <chr>     <dbl>      <int>    <int>   <int>   <int>  <dbl>
## 1 Female     45.3         45       55      19      90   15.0
## 2 Male       46.2         45       42      16      89   15.9
# Getting the baseline score on the House-Brackmann scale for each gender
mmp_group<-median(placebo_group$Baseline.Score.on.House.Brackmann.scale & placebo_group$Sex== "Male")
mmp_group
## [1] 0
fmp_group<-median(placebo_group$Baseline.Score.on.House.Brackmann.scale & placebo_group$Sex== "Female")
fmp_group
## [1] 1
# Creating an additional age group column that groups all participants into an age category 
BellsPalsyData$agegroup <- case_when(BellsPalsyData$Age >= 0  & BellsPalsyData$Age <= 24 ~ 1,
                                     BellsPalsyData$Age >= 25  & BellsPalsyData$Age <= 49 ~ 2,
                                     BellsPalsyData$Age >= 50  & BellsPalsyData$Age <= 74 ~ 3,
                                     BellsPalsyData$Age >= 75  & BellsPalsyData$Age <= 100 ~ 4)
# Creating a graph that looks at the distribution of age groups in the study 
ggplot(data=BellsPalsyData, aes(x=agegroup))+
  geom_bar(binwidth = 5, color="black", fill="blue")+
  theme_classic()+
  labs(title = "Frequency of Age Groups", x = "Age Group", y = "Frequency")
## Warning in geom_bar(binwidth = 5, color = "black", fill = "blue"): Ignoring
## unknown parameters: `binwidth`

# The House–Brackmann score is a score to grade the degree of nerve damage in a facial nerve palsy
# Looking at the most common time frame between the onset of symptoms and start of treatment
Mode_BP_Data<- Mode(BellsPalsyData$Time.between.onset.of.symptoms.and.start.of.treatment)
Mode_BP_Data
## [1] "Within 24 hr"
# Creating an additional column that looks the difference in recovery from baseline compared to 3 months of treatment
BellsPalsyData['difference_3']= BellsPalsyData$X3.Month.Score.on.House.Brackmann.scale
-BellsPalsyData$Baseline.Score.on.House.Brackmann.scale
##   [1] -6 -6 -4 -3 -3 -4 -4 -4 -4 -4 -4 -2 -3 -4 -4 -4 -2 -3 -4 -2 -3 -4 -2 -4 -4
##  [26] -3 -4 -5 -2 -3 -4 -3 -3 -2 -5 -3 -2 -6 -4 -2 -4 -5 -6 -4 -3 -3 -4 -3 -2 -6
##  [51] -5 -3 -3 -2 -3 -4 -3 -2 -4 -3 -5 -3 -3 -5 -5 -2 -3 -2 -4 -4 -3 -2 -3 -6 -4
##  [76] -4 -6 -2 -3 -5 -4 -3 -3 -4 -3 -3 -2 -3 -4 -4 -3 -3 -5 -5 -4 -4 -2 -4 -2 -2
## [101] -2 -4 -5 -2 -5 -5 -4 -3 -2 -2 -4 -5 -3 -4 -5 -3 -5 -2 -3 -4 -4 -5 -4 -3 -3
## [126] -3 -3 -3 -3 -3 -3 -4 -3 -4 -4 -3 -4 -4 -4 -3 -4 -3 -6 -5 -6 -3 -4 -5 -4 -3
## [151] -5 -4 -2 -3 -5 -5 -2 -4 -5 -4 -4 -2 -2 -5 -4 -2 -3 -3 -5 -4 -6 -4 -5 -3 -4
## [176] -5 -4 -5 -5 -3 -5 -2 -3 -4 -2 -3 -3 -3 -6 -5 -5 -4 -4 -5 -5 -5 -2 -2 -3 -4
## [201] -2 -4 -3 -6 -4 -3 -4 -4 -3 -2 -2 -2 -4 -4 -2 -2 -3 -3 -4 -2 -4 -4 -2 -2 -4
## [226] -3 -4 -3 -4 -3 -5 -4 -3 -2 -5 -2 -2 -3 -2 -4 -5 -4 -3 -2 -6 -3 -5 -3 -5 -2
## [251] -2 -5 -3 -2 -3 -4 -4 -5 -4 -4 -4 -4 -4 -4 -5 -4 -4 -2 -4 -4 -2 -2 -5 -3 -4
## [276] -4 -5 -5 -3 -3 -3 -3 -4 -4 -4 -5 -4 -2 -3 -4 -5 -2 -2 -4 -3 -5 -2 -3 -4 -4
## [301] -3 -6 -4 -4 -3 -4 -3 -2 -4 -2 -5 -2 -6 -4 -6 -5 -2 -4 -2 -4 -2 -4 -5 -3 -4
## [326] -4 -4 -4 -4 -4 -6 -4 -4 -3 -3 -5 -4 -4 -3 -3 -5 -4 -3 -5 -5 -6 -3 -4 -3 -3
## [351] -3 -6 -4 -5 -3 -6 -5 -4 -4 -5 -4 -3 -3 -3 -5 -3 -2 -6 -2 -4 -4 -6 -3 -3 -4
## [376] -4 -3 -2 -3 -2 -3 -3 -3 -2 -4 -2 -2 -3 -4 -2 -2 -2 -2 -4 -3 -4 -3 -3 -6 -2
## [401] -3 -2 -4 -6 -5 -5 -4 -6 -3 -4 -4 -5 -4 -3 -4 -4 -4 -5 -6 -4 -6 -4 -4 -5 -4
## [426] -4 -5 -4 -4 -4 -3 -2 -5 -5 -4 -3 -6 -4 -3 -3 -4 -3 -3 -4 -4 -3 -4 -3 -3 -2
## [451] -5 -2 -6 -4 -4 -3 -5 -3 -4 -2 -3 -5 -3 -5 -3 -6 -4 -3 -5 -6 -2 -5 -2 -6 -4
## [476] -5 -3 -5 -4 -2 -2 -4 -3 -6 -5 -3 -4 -3 -4 -5 -3 -3 -4 -5
# Creating a graphical representation of the initial score on the House Brackmann scale
ggplot(data=BellsPalsyData, aes(x=Baseline.Score.on.House.Brackmann.scale
))+
  geom_bar(binwidth = 10, color="black", fill="red")+
  theme_classic()+
  labs(title = "House Brackmann Score Initial Score", x = "House Brackmann Score", y = "Frequency")
## Warning in geom_bar(binwidth = 10, color = "black", fill = "red"): Ignoring
## unknown parameters: `binwidth`

# Creating a graphical representation of the difference between the ending score on the House Brackmann scale and the starting score
ggplot(data=BellsPalsyData, aes(x=difference_3))+
  geom_bar(binwidth = 10, color="black", fill="green")+
  theme_classic()+
  labs(title = "House Brackmann Score 3 Months After Initial Score", x = "House Brackmann Score", y = "Frequency")
## Warning in geom_bar(binwidth = 10, color = "black", fill = "green"): Ignoring
## unknown parameters: `binwidth`

# Creating an additional column that looks the difference in recovery from baseline compared to 9 months of treatment
BellsPalsyData['difference_3']= BellsPalsyData$X9.Month.Score.on.House.Brackmann.scale
-BellsPalsyData$Baseline.Score.on.House.Brackmann.scale
##   [1] -6 -6 -4 -3 -3 -4 -4 -4 -4 -4 -4 -2 -3 -4 -4 -4 -2 -3 -4 -2 -3 -4 -2 -4 -4
##  [26] -3 -4 -5 -2 -3 -4 -3 -3 -2 -5 -3 -2 -6 -4 -2 -4 -5 -6 -4 -3 -3 -4 -3 -2 -6
##  [51] -5 -3 -3 -2 -3 -4 -3 -2 -4 -3 -5 -3 -3 -5 -5 -2 -3 -2 -4 -4 -3 -2 -3 -6 -4
##  [76] -4 -6 -2 -3 -5 -4 -3 -3 -4 -3 -3 -2 -3 -4 -4 -3 -3 -5 -5 -4 -4 -2 -4 -2 -2
## [101] -2 -4 -5 -2 -5 -5 -4 -3 -2 -2 -4 -5 -3 -4 -5 -3 -5 -2 -3 -4 -4 -5 -4 -3 -3
## [126] -3 -3 -3 -3 -3 -3 -4 -3 -4 -4 -3 -4 -4 -4 -3 -4 -3 -6 -5 -6 -3 -4 -5 -4 -3
## [151] -5 -4 -2 -3 -5 -5 -2 -4 -5 -4 -4 -2 -2 -5 -4 -2 -3 -3 -5 -4 -6 -4 -5 -3 -4
## [176] -5 -4 -5 -5 -3 -5 -2 -3 -4 -2 -3 -3 -3 -6 -5 -5 -4 -4 -5 -5 -5 -2 -2 -3 -4
## [201] -2 -4 -3 -6 -4 -3 -4 -4 -3 -2 -2 -2 -4 -4 -2 -2 -3 -3 -4 -2 -4 -4 -2 -2 -4
## [226] -3 -4 -3 -4 -3 -5 -4 -3 -2 -5 -2 -2 -3 -2 -4 -5 -4 -3 -2 -6 -3 -5 -3 -5 -2
## [251] -2 -5 -3 -2 -3 -4 -4 -5 -4 -4 -4 -4 -4 -4 -5 -4 -4 -2 -4 -4 -2 -2 -5 -3 -4
## [276] -4 -5 -5 -3 -3 -3 -3 -4 -4 -4 -5 -4 -2 -3 -4 -5 -2 -2 -4 -3 -5 -2 -3 -4 -4
## [301] -3 -6 -4 -4 -3 -4 -3 -2 -4 -2 -5 -2 -6 -4 -6 -5 -2 -4 -2 -4 -2 -4 -5 -3 -4
## [326] -4 -4 -4 -4 -4 -6 -4 -4 -3 -3 -5 -4 -4 -3 -3 -5 -4 -3 -5 -5 -6 -3 -4 -3 -3
## [351] -3 -6 -4 -5 -3 -6 -5 -4 -4 -5 -4 -3 -3 -3 -5 -3 -2 -6 -2 -4 -4 -6 -3 -3 -4
## [376] -4 -3 -2 -3 -2 -3 -3 -3 -2 -4 -2 -2 -3 -4 -2 -2 -2 -2 -4 -3 -4 -3 -3 -6 -2
## [401] -3 -2 -4 -6 -5 -5 -4 -6 -3 -4 -4 -5 -4 -3 -4 -4 -4 -5 -6 -4 -6 -4 -4 -5 -4
## [426] -4 -5 -4 -4 -4 -3 -2 -5 -5 -4 -3 -6 -4 -3 -3 -4 -3 -3 -4 -4 -3 -4 -3 -3 -2
## [451] -5 -2 -6 -4 -4 -3 -5 -3 -4 -2 -3 -5 -3 -5 -3 -6 -4 -3 -5 -6 -2 -5 -2 -6 -4
## [476] -5 -3 -5 -4 -2 -2 -4 -3 -6 -5 -3 -4 -3 -4 -5 -3 -3 -4 -5
# Creating a graphical representation of the difference between the ending score on the House Brackmann scale and the starting score
ggplot(data=BellsPalsyData, aes(x=difference_3))+
  geom_bar(binwidth = 10, color="black", fill="blue")+
  theme_classic()+
  labs(title = "House Brackmann Score 9 Months After Initial Score", x = "House Brackmann Score", y = "Frequency")
## Warning in geom_bar(binwidth = 10, color = "black", fill = "blue"): Ignoring
## unknown parameters: `binwidth`

# Creating a new data frame for side-by-side comparison
comparison_data <- data.frame(
  ScoreType = rep(c("3 Month", "Baseline"), each = nrow(BellsPalsyData)),
  Score = c(BellsPalsyData$X3.Month.Score.on.House.Brackmann.scale,
            BellsPalsyData$Baseline.Score.on.House.Brackmann.scale)
)

# Plotting side-by-side bar graph
ggplot(comparison_data, aes(x = ScoreType, y = Score, fill = ScoreType)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Comparison of Scores After 3 Months",
       x = "Time Point",
       y = "Score on House-Brackmann Scale") +
  scale_fill_manual(values = c("purple", "red")) +
  theme_classic()

# Creating a new data frame for side-by-side comparison
comparison_data2 <- data.frame(
  ScoreType = rep(c("9 Month", "Baseline"), each = nrow(BellsPalsyData)),
  Score = c(BellsPalsyData$X9.Month.Score.on.House.Brackmann.scale,
            BellsPalsyData$Baseline.Score.on.House.Brackmann.scale)
)

# Plotting side-by-side bar graph
ggplot(comparison_data2, aes(x = ScoreType, y = Score, fill = ScoreType)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Comparison of Scores After 9 Months",
       x = "Time Point",
       y = "Score on House-Brackmann Scale") +
  scale_fill_manual(values = c("blue", "green")) + 
  theme_classic()

# Getting summary statistics by treatment group 
initial_summ_stats_by_treatment_group<-as.data.frame(
  BellsPalsyData|>
    group_by(Treatment.Group)|>
     summarise(mean_bs= mean(Baseline.Score.on.House.Brackmann.scale),
              median_bs= median(Baseline.Score.on.House.Brackmann.scale),
              mode_bs= Mode(Baseline.Score.on.House.Brackmann.scale),
              min_bs= min(Baseline.Score.on.House.Brackmann.scale),
              max_bs= max(Baseline.Score.on.House.Brackmann.scale),
              sd_bs= sd(Baseline.Score.on.House.Brackmann.scale) ))
initial_summ_stats_by_treatment_group
##          Treatment.Group  mean_bs median_bs mode_bs min_bs max_bs    sd_bs
## 1      Acyclovir–Placebo 3.803279         4       4      2      6 1.103258
## 2 Acyclovir–Prednisolone 3.606299         4       3      2      6 1.127981
## 3        Placebo–Placebo 3.737705         4       4      2      6 1.169893
## 4   Prednisolone–Placebo 3.577236         4       4      2      6 1.123578
nine_summ_stats_by_treatment_group<-as.data.frame(
  BellsPalsyData|>
    group_by(Treatment.Group)|>
     summarise(mean_9s= mean(X9.Month.Score.on.House.Brackmann.scale),
              median_9s= median(X9.Month.Score.on.House.Brackmann.scale),
              mode_9s= Mode(X9.Month.Score.on.House.Brackmann.scale),
              min_9s= min(X9.Month.Score.on.House.Brackmann.scale),
              max_9s= max(X9.Month.Score.on.House.Brackmann.scale),
              sd_9s= sd(X9.Month.Score.on.House.Brackmann.scale) ))
nine_summ_stats_by_treatment_group
##          Treatment.Group  mean_9s median_9s mode_9s min_9s max_9s     sd_9s
## 1      Acyclovir–Placebo 1.213115         1       1      1      4 0.5637758
## 2 Acyclovir–Prednisolone 1.086614         1       1      1      3 0.3338953
## 3        Placebo–Placebo 1.213115         1       1      1      4 0.5782491
## 4   Prednisolone–Placebo 1.065041         1       1      1      3 0.2787512
# Looking at the difference in means across treatment groups from the initial score to the 9 month score 
nine_summ_stats_by_treatment_group$mean_9s-initial_summ_stats_by_treatment_group$mean_bs
## [1] -2.590164 -2.519685 -2.524590 -2.512195
# Seeing if there is a correlation between baseline scores and 3 month, and 9 month scores by treatment group and age group
BellsPalsyData|>
  group_by(Treatment.Group, agegroup)|>
  summarise(cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale),
            cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale),)
## Warning: There were 5 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `cor(Baseline.Score.on.House.Brackmann.scale,
##   X3.Month.Score.on.House.Brackmann.scale)`.
## ℹ In group 5: `Treatment.Group = "Acyclovir–Prednisolone"` and `agegroup = 1`.
## Caused by warning in `cor()`:
## ! the standard deviation is zero
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 4 remaining warnings.
## `summarise()` has grouped output by 'Treatment.Group'. You can override using
## the `.groups` argument.
## # A tibble: 16 × 4
## # Groups:   Treatment.Group [4]
##    Treatment.Group        agegroup cor(Baseline.Score.o…¹ cor(Baseline.Score.o…²
##    <chr>                     <dbl>                  <dbl>                  <dbl>
##  1 Acyclovir–Placebo             1                 0.415                 0.248  
##  2 Acyclovir–Placebo             2                 0.109                 0.0528 
##  3 Acyclovir–Placebo             3                 0.364                 0.434  
##  4 Acyclovir–Placebo             4                 0.816                 0.707  
##  5 Acyclovir–Prednisolone        1                NA                    NA      
##  6 Acyclovir–Prednisolone        2                 0.0103                0.00649
##  7 Acyclovir–Prednisolone        3                 0.258                 0.183  
##  8 Acyclovir–Prednisolone        4                NA                    NA      
##  9 Placebo–Placebo               1                 0.304                NA      
## 10 Placebo–Placebo               2                 0.219                 0.181  
## 11 Placebo–Placebo               3                 0.167                -0.0688 
## 12 Placebo–Placebo               4                 0.732                 0.642  
## 13 Prednisolone–Placebo          1                NA                    NA      
## 14 Prednisolone–Placebo          2                 0.0364                0.0294 
## 15 Prednisolone–Placebo          3                -0.0836               -0.0845 
## 16 Prednisolone–Placebo          4                 0.189                 0.945  
## # ℹ abbreviated names:
## #   ¹​`cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale)`,
## #   ²​`cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale)`
# Seeing if there is a correlation between baseline scores and 3 month, and 9 month scores by treatment group and sex
BellsPalsyData|>
  group_by(Treatment.Group, Sex)|>
  summarise(cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale),
            cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale),)
## `summarise()` has grouped output by 'Treatment.Group'. You can override using
## the `.groups` argument.
## # A tibble: 8 × 4
## # Groups:   Treatment.Group [4]
##   Treatment.Group        Sex    cor(Baseline.Score.on.H…¹ cor(Baseline.Score.o…²
##   <chr>                  <chr>                      <dbl>                  <dbl>
## 1 Acyclovir–Placebo      Female                    0.278                  0.332 
## 2 Acyclovir–Placebo      Male                      0.372                  0.348 
## 3 Acyclovir–Prednisolone Female                    0.117                  0.0365
## 4 Acyclovir–Prednisolone Male                      0.239                  0.153 
## 5 Placebo–Placebo        Female                    0.283                  0.140 
## 6 Placebo–Placebo        Male                      0.272                  0.209 
## 7 Prednisolone–Placebo   Female                    0.0983                 0.320 
## 8 Prednisolone–Placebo   Male                     -0.0369                -0.0958
## # ℹ abbreviated names:
## #   ¹​`cor(Baseline.Score.on.House.Brackmann.scale, X3.Month.Score.on.House.Brackmann.scale)`,
## #   ²​`cor(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale)`
# Visualizing correlation between baseline and 9-month House Brackmann scores
ggplot(data = BellsPalsyData, aes(x = Baseline.Score.on.House.Brackmann.scale, y = X9.Month.Score.on.House.Brackmann.scale)) + 
  geom_jitter() + 
  labs(title = "Baseline and Nine Month Scores",
       x= "Baseline Score",
       y= "Nine Month Score")+
  theme_classic()

# Only looking at placebo as that is my target population
placebo_group<-BellsPalsyData[BellsPalsyData$Treatment.Group== "Placebo–Placebo", ]

# Grouping by the variable "Sex" and fitting linear models
model1 <- placebo_group |>
  group_by(Sex) |>
  do(model_summary = summary(lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .)))

# Printing summaries
print(model1$model_summary)
## [[1]]
## 
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, 
##     data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.36825 -0.23306 -0.16547 -0.09788  2.69934 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              0.96269    0.23712   4.060 0.000138
## Baseline.Score.on.House.Brackmann.scale  0.06759    0.06024   1.122 0.266129
##                                            
## (Intercept)                             ***
## Baseline.Score.on.House.Brackmann.scale    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5981 on 63 degrees of freedom
## Multiple R-squared:  0.01959,    Adjusted R-squared:  0.004028 
## F-statistic: 1.259 on 1 and 63 DF,  p-value: 0.2661
## 
## 
## [[2]]
## 
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, 
##     data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45212 -0.23862 -0.13187 -0.02512  2.54788 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                              0.81162    0.26192   3.099  0.00306 **
## Baseline.Score.on.House.Brackmann.scale  0.10675    0.06731   1.586  0.11850   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5512 on 55 degrees of freedom
## Multiple R-squared:  0.04373,    Adjusted R-squared:  0.02634 
## F-statistic: 2.515 on 1 and 55 DF,  p-value: 0.1185
# Fitting linear models for each group
model1 <- placebo_group |>
  group_by(Sex) |>
  do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))

# Extracting  coefficients for each model
coefficients_df <- model1 |>
  summarise(intercept = coef(model)[1], slope = coef(model)[2])

# Fit linear models for each group
model1 <- placebo_group |>
  group_by(Sex) |>
  do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))

# Creating a jitter plot with line of best fit
ggplot(placebo_group, aes(x = Baseline.Score.on.House.Brackmann.scale, y = X9.Month.Score.on.House.Brackmann.scale, color = Sex)) +
  geom_jitter(position = position_jitter(width = 0.1), alpha = 0.5) +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Baseline Score on House-Brackmann scale", y = "9 Month Score on House-Brackmann scale", color = "Sex") +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

# Extracting residuals for each group
residuals_df <- model1 |>
  summarise(residuals = resid(model))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Creatig a QQ plot of residuals
qqnorm(residuals_df$residuals)
qqline(residuals_df$residuals)

# Creating a histogram of the residuals
hist(residuals_df$residuals, breaks = 10, main = "Histogram of Residuals", xlab = "Residuals", col = "lightblue")
title(main = "Histogram of Residuals", col.main = "blue")
title(xlab = "Residuals", col.lab = "blue")
title(ylab = "Frequency", col.lab = "blue")

# Grouping by the variable age group to fit the linear models
model2 <- placebo_group |>
  group_by(agegroup) |>
  do(model_summary = summary(lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .)))
## Warning in summary.lm(lm(X9.Month.Score.on.House.Brackmann.scale ~
## Baseline.Score.on.House.Brackmann.scale, : essentially perfect fit: summary may
## be unreliable
# Printing summaries
print(model2$model_summary)
## [[1]]
## 
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, 
##     data = .)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##      0      0      0      0      0 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                    1          0     Inf   <2e-16
## Baseline.Score.on.House.Brackmann.scale        0          0     NaN      NaN
##                                            
## (Intercept)                             ***
## Baseline.Score.on.House.Brackmann.scale    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0 on 8 degrees of freedom
## Multiple R-squared:    NaN,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 1 and 8 DF,  p-value: NA
## 
## 
## [[2]]
## 
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, 
##     data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.31920 -0.16636 -0.08995 -0.08995  2.75722 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              0.86069    0.20229   4.255 6.86e-05
## Baseline.Score.on.House.Brackmann.scale  0.07642    0.05142   1.486    0.142
##                                            
## (Intercept)                             ***
## Baseline.Score.on.House.Brackmann.scale    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4646 on 65 degrees of freedom
## Multiple R-squared:  0.03286,    Adjusted R-squared:  0.01798 
## F-statistic: 2.209 on 1 and 65 DF,  p-value: 0.1421
## 
## 
## [[3]]
## 
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, 
##     data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3680 -0.3291 -0.2903 -0.2126  1.7097 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              1.44563    0.35781   4.040 0.000251
## Baseline.Score.on.House.Brackmann.scale -0.03883    0.09133  -0.425 0.673065
##                                            
## (Intercept)                             ***
## Baseline.Score.on.House.Brackmann.scale    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6554 on 38 degrees of freedom
## Multiple R-squared:  0.004736,   Adjusted R-squared:  -0.02146 
## F-statistic: 0.1808 on 1 and 38 DF,  p-value: 0.6731
## 
## 
## [[4]]
## 
## Call:
## lm(formula = X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, 
##     data = .)
## 
## Residuals:
##    1    2    3    4    5 
##  0.5 -1.0 -0.5 -0.5  1.5 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                               -0.500      1.669  -0.300    0.784
## Baseline.Score.on.House.Brackmann.scale    0.500      0.345   1.449    0.243
## 
## Residual standard error: 1.155 on 3 degrees of freedom
## Multiple R-squared:  0.4118, Adjusted R-squared:  0.2157 
## F-statistic:   2.1 on 1 and 3 DF,  p-value: 0.2432
# Fitting the linear models for each group
model2 <- placebo_group |>
  group_by(agegroup) |>
  do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))

# Creating a Jitter plot with line of best fit
ggplot(placebo_group, aes(x = Baseline.Score.on.House.Brackmann.scale, y = X9.Month.Score.on.House.Brackmann.scale, color = agegroup)) +
  geom_jitter(position = position_jitter(width = 0.1), alpha = 0.5) +
  geom_smooth(method = "lm", aes(group = agegroup), se = FALSE, data = placebo_group) +
  labs(x = "Baseline Score on House-Brackmann scale", y = "X9 Month Score on House-Brackmann scale", color = "Age Group") +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

# Fitting the linear models for each group
model2 <- placebo_group |>
  group_by(agegroup) |>
  do(model = lm(X9.Month.Score.on.House.Brackmann.scale ~ Baseline.Score.on.House.Brackmann.scale, data = .))

# Extracting residuals for each group
residuals_df <- model2 |>
  summarise(residuals = resid(model))
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Creating a QQ plot of residuals
qqnorm(residuals_df$residuals)
qqline(residuals_df$residuals)

# Creating a histogram of the residuals with intervals of 0.5
hist(residuals_df$residuals, breaks = seq(min(residuals_df$residuals), max(residuals_df$residuals) + 0.5, by = 0.5), 
     main = "Histogram of Residuals", xlab = "Residuals", col = "lightgreen")
title(main = "Histogram of Residuals", col.main = "green")
title(xlab = "Residuals", col.lab = "green")
title(ylab = "Frequency", col.lab = "green")

# Checking for outliers in baseline and 9-month scores
outlier_detection <- BellsPalsyData %>%
  select(Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale) %>%
  gather(key = "time_point", value = "score", Baseline.Score.on.House.Brackmann.scale, X9.Month.Score.on.House.Brackmann.scale) %>%
  ggplot(aes(x = score)) +
  geom_boxplot() +
  facet_wrap(~time_point, scales = "free") +
  labs(x = "Score", y = "Frequency", title = "Outlier Detection") +
  theme_classic()

outlier_detection