1. Table of Contents
This document presents a non-exhaustive list of various treatment comparison tests between two groups with ordinal response variable using various helpful packages in R.
1.1 Sample Data
The DBP dataset from the book Clinical Trial Data Analysis Using R and SAS was used for this illustrated example.
Preliminary dataset assessment:
[A] 40 Rows (observations)
[B] 8 Columns (variables)
[B.1] 1/8 Factor variable = TRT (factor)
[B.1.1] Placebo = placebo drug treatment
[B.1.2] New = new drug treatment
[B.2] 5/8 Response variables = DBP1, DBP2, DBP3, DBP4, DBP5 (numeric)
[B.2.1] DBP1 = diastolic blood pressure baseline measurement made on the 1st month
[B.2.2] DBP2 = diastolic blood pressure measurement made on the 2nd month
[B.2.3] DBP3 = diastolic blood pressure measurement made on the 3rd month
[B.2.4] DBP4 = diastolic blood pressure measurement made on the 4th month
[B.2.5] DBP5 = diastolic blood pressure measurement made on the 5th month
[B.3] 1/8 Covariate variable = Age (numeric)
[B.4] 1/8 Covariate variable = Sex (factor)
[B.4.1] M = male study participant
[B.4.2] F = female study participant
## [1] 40 9
## 'data.frame': 40 obs. of 9 variables:
## $ Subject: int 1 2 3 4 5 6 7 8 9 10 ...
## $ TRT : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
## $ DBP1 : int 114 116 119 115 116 117 118 120 114 115 ...
## $ DBP2 : int 115 113 115 113 112 112 111 115 112 113 ...
## $ DBP3 : int 113 112 113 112 107 113 100 113 113 108 ...
## $ DBP4 : int 109 103 104 109 104 104 109 102 109 106 ...
## $ DBP5 : int 105 101 98 101 105 102 99 102 103 97 ...
## $ Age : int 43 51 48 42 49 47 50 61 43 51 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 2 1 2 2 2 ...
## Subject TRT DBP1 DBP2 DBP3
## Min. : 1.00 A:20 Min. :114.0 Min. :111.0 Min. :100.0
## 1st Qu.:10.75 B:20 1st Qu.:115.0 1st Qu.:113.0 1st Qu.:112.0
## Median :20.50 Median :116.5 Median :115.0 Median :113.0
## Mean :20.50 Mean :116.7 Mean :114.3 Mean :112.4
## 3rd Qu.:30.25 3rd Qu.:118.0 3rd Qu.:115.0 3rd Qu.:113.0
## Max. :40.00 Max. :121.0 Max. :119.0 Max. :118.0
## DBP4 DBP5 Age Sex
## Min. :102.0 Min. : 97.0 Min. :38.00 F:18
## 1st Qu.:106.8 1st Qu.:101.8 1st Qu.:42.00 M:22
## Median :109.0 Median :106.5 Median :48.00
## Mean :109.3 Mean :106.7 Mean :47.83
## 3rd Qu.:113.2 3rd Qu.:112.0 3rd Qu.:51.25
## Max. :117.0 Max. :115.0 Max. :63.00
##################################
# Renaming the treatment categories to more verbose labels
##################################
DBP.Complete$TRT <- ifelse(DBP.Complete$TRT=="A","New","Placebo")
##################################
# Setting the levels for the treatment categories
##################################
DBP.Complete$TRT <- factor(DBP.Complete$TRT,
levels = c("Placebo","New"))
##################################
# Creating an analysis dataset without the subject information column
##################################
DBP.Analysis <- DBP.Complete[,-1]
##################################
# Creating a response-covariate dataset without the treatment column
##################################
DBP.Response.Covariate <- DBP.Analysis[,-1]
##################################
# Formulating a data type assessment summary
##################################
PDA <- DBP.Response.Covariate
(PDA.Summary <- data.frame(
Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 DBP1 integer
## 2 2 DBP2 integer
## 3 3 DBP3 integer
## 4 4 DBP4 integer
## 5 5 DBP5 integer
## 6 6 Age integer
## 7 7 Sex factor
1.2 Data Quality Assessment
Data quality assessment:
[A] No missing observations noted for any response and covariate variable.
[B] No low variance (Unique.Count.Ratio<0.01 or First.Second.Mode.Ratio>5) noted for any response variable.
[C] No high skewness (Skewness>3 or Skewness<(-3)) noted for any response variable.
##################################
# Restoring the working directory
##################################
setwd("C:/Users/User/Desktop/RMarkdown Projects")
##################################
# Reusing the response-covariate dataset
##################################
DQA <- DBP.Response.Covariate
##################################
# Formulating a data subset of response variables (numeric)
##################################
DQA.Variables.Response <- DQA[,names(DQA) %in% c("DBP1","DBP2","DBP3","DBP4","DBP5")]
##################################
# Formulating a data subset of covariate variables (numeric)
##################################
DQA.Variables.Covariate.Numeric <- DQA$Age
##################################
# Formulating a data subset of covariate variables (factor)
##################################
DQA.Variables.Covariate.Factor <- DQA$Sex
##################################
# Formulating an overall data quality assessment summary
##################################
(DQA.Summary <- data.frame(
Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 DBP1 integer 40 0 1.000
## 2 2 DBP2 integer 40 0 1.000
## 3 3 DBP3 integer 40 0 1.000
## 4 4 DBP4 integer 40 0 1.000
## 5 5 DBP5 integer 40 0 1.000
## 6 6 Age integer 40 0 1.000
## 7 7 Sex factor 40 0 1.000
## [1] "No missing observations noted."
##################################
# Formulating a data quality assessment summary for response variables (numeric)
##################################
if (length(names(DQA.Variables.Response))>0) {
##################################
# Formulating a function to determine the first mode
##################################
FirstModes <- function(x) {
ux <- unique(na.omit(x))
tab <- tabulate(match(x, ux))
ux[tab == max(tab)]
}
##################################
# Formulating a function to determine the second mode
##################################
SecondModes <- function(x) {
ux <- unique(na.omit(x))
tab <- tabulate(match(x, ux))
fm = ux[tab == max(tab)]
sm = na.omit(x)[!(na.omit(x) %in% fm)]
usm <- unique(sm)
tabsm <- tabulate(match(sm, usm))
usm[tabsm == max(tabsm)]
}
(DQA.Variables.Response.Summary <- data.frame(
Column.Name= names(DQA.Variables.Response),
Column.Type=sapply(DQA.Variables.Response, function(x) class(x)),
Unique.Count=sapply(DQA.Variables.Response, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Variables.Response, function(x) format(round((length(unique(x))/nrow(DQA.Variables.Response)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Variables.Response, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Variables.Response, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Variables.Response, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Variables.Response, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Variables.Response, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Variables.Response, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Variables.Response, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Variables.Response, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Variables.Response, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Variables.Response, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Variables.Response, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Variables.Response, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Variables.Response, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 DBP1 integer 8 0.200 114.000
## 2 DBP2 integer 8 0.200 115.000
## 3 DBP3 integer 13 0.325 113.000
## 4 DBP4 integer 13 0.325 109.000
## 5 DBP5 integer 16 0.400 115.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 116.000 9 8 1.125
## 2 113.000 17 6 2.833
## 3 114.000 19 4 4.750
## 4 114.000 12 8 1.500
## 5 102.000 6 5 1.200
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th
## 1 114.000 116.650 116.500 121.000 0.244 2.074 115.000
## 2 111.000 114.350 115.000 119.000 0.357 3.139 113.000
## 3 100.000 112.375 113.000 118.000 -1.676 8.305 112.000
## 4 102.000 109.350 109.000 117.000 -0.077 2.114 106.750
## 5 97.000 106.650 106.500 115.000 0.053 1.552 101.750
## Percentile75th
## 1 118.000
## 2 115.000
## 3 113.000
## 4 113.250
## 5 112.000
## [1] "No low variance numeric predictors due to high first-second mode ratio noted."
## [1] "No low variance numeric predictors due to low unique count ratio noted."
## [1] "No skewed response variables noted."
1.3 Research Question
Using the DBP dataset, treatment comparison tests will be conducted to investigate the following :
[A] Research Question : Do changes in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) differ between treatment groups New and Placebo?
[A.1] Factor variable = TRT
[A.1.1] Placebo = placebo drug treatment
[A.1.2] New = new drug treatment
[A.2] Response variable = DBP5-DBP1
1.4 Sample Size and Power Computation
Given that the DBP dataset used 20 study subjects for each treatment group TRT=New (new drug) and TRT=Placebo (placebo drug). The following parameters might have been used for determining the appropriate sample size prior to the study :
[A (Mean of Treatment Group)] Mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) for the treatment group TRT=New (new drug) was hypothesized to be -5.0 (indicating a decrease in diastolic blood pressure).
[B (Mean of Control Group)] Mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) for the treatment group TRT=Placebo (new drug) was hypothesized to be 0.0 (indicating no change in diastolic blood pressure).
[C (Measurement Standard Deviation)] Standard deviation of the change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) was hypothesized to be 5.0.
[D (Effect Size)] Effect size using Cohen’s D was hypothesized to be 1.0 (large magnitude).
[E. (Type I Error = Alpha)] Type I error (difference between treatment groups is concluded when there is no actual difference) was hypothesized to be 5% .
[F. Type II Error= Beta] Type II error (no difference between treatment groups is concluded when there is an actual difference) was hypothesized to be 20% . Power of the test was hypothesized to be 80%.
##################################
# Determining sample sizes
##################################
##################################
# Defining the range of possible effect sizes
##################################
##################################
# Defining a range of possible hypothesized values for the treatment mean
##################################
mu_Treatment=c(-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3)
##################################
# Defining a fixed hypothesized value for the control mean
##################################
mu_Control=0
##################################
# Defining a fixed hypothesized value for the sample size ratio between the treatment and control groups
##################################
kappa=1
##################################
# Defining a fixed hypothesized value for the standard deviation
##################################
sd=5
##################################
# Defining a fixed hypothesized value for the Type I error
##################################
alpha=0.05
##################################
# Defining a range of possible hypothesized values for the Type I error
##################################
beta=c(0.30,0.20,0.10)
##################################
# Computing the range of samples sizes based on different levels type II error
##################################
beta_1=beta[1]
(nB_Beta30=(1+1/kappa)*(sd*(qnorm(1-alpha/2)+qnorm(1-beta_1))/(mu_Treatment-mu_Control))^2)
## [1] 1.371570 1.574507 1.826055 2.143079 2.550441 3.086033 3.809918
## [8] 4.821927 6.298028 8.572315 12.344134 19.287709 34.289261
## [1] 2 2 2 3 3 4 4 5 7 9 13 20 35
## [1] 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044
## [8] 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044
## [1] 1.744195 2.002265 2.322154 2.725305 3.243339 3.924440 4.844987
## [8] 6.131937 8.009061 10.901222 15.697759 24.527749 43.604887
## [1] 2 3 3 3 4 4 5 7 9 11 16 25 44
## [1] 0.800001 0.800001 0.800001 0.800001 0.800001 0.800001 0.800001 0.800001
## [9] 0.800001 0.800001 0.800001 0.800001 0.800001
## [1] 2.334983 2.680465 3.108705 3.648411 4.341910 5.253712 6.486064
## [8] 8.208924 10.721860 14.593643 21.014846 32.835697 58.374573
## [1] 3 3 4 4 5 6 7 9 11 15 22 33 59
## [1] 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001
## [8] 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001
SampleSizePowerCurve <- as.data.frame(cbind(mu_Treatment,nB_Beta30,nB_Beta20,nB_Beta10))
names(SampleSizePowerCurve) <- c("DBP5.DBP1.Difference","Power=70%","Power=80%","Power=90%")
##################################
# Restructuring the data
##################################
SampleSizePowerCurve.Reshaped <- gather(SampleSizePowerCurve,"Power=70%","Power=80%","Power=90%",
key="Power",
value="Sample.Size")
##################################
# Plotting the sample size and power curve
##################################
(DBP5DBP1Difference.SampleSize.LinePlot.ByPower <- ggplot(SampleSizePowerCurve.Reshaped,aes(x=DBP5.DBP1.Difference,
y=Sample.Size,
color=Power)) +
geom_line(size=1)+
geom_point(size=4)+
theme_bw() +
scale_color_brewer(palette="Paired") +
scale_x_continuous(name="Hypothesized Diastolic Blood Pressure Change (DBP5-DBP1) for Treatment Group",limits=c(-15,-3),breaks=seq(-15,-3,by=1)) +
scale_y_continuous(name="Sample Size for Each Treatment Group",limits=c(0,80),breaks=seq(0,80,by=10)) +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50)) +
ggtitle("Sample Size Calculations for Different Power and Effect Size"))

1.5 Data Exploration
Differential boxplot analysis of the response variable DBP5-DBP1 by factor variable TRT showed that :
[A] Higher change in diastolic blood pressure measurements between the 1st and 5th months (DBP5-DBP1) observed for TRT=New (new drug) as compared to TRT=Placebo (placebo drug).
[B] The change in diastolic blood pressure measurements between the 1st and 5th months (DBP5-DBP1) was observed to follow a generally normal distribution for both TRT=New (new drug) and TRT=Placebo (placebo drug).
[C] Minimal outliers observed for both TRT=New (new drug) and TRT=Placebo (placebo drug).
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex
## 1 New 114 115 113 109 105 43 F
## 2 New 116 113 112 103 101 51 M
## 3 New 119 115 113 104 98 48 F
## 4 New 115 113 112 109 101 42 F
## 5 New 116 112 107 104 105 49 M
## 6 New 117 112 113 104 102 47 M
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
## [1] 1
## [1] -21
##################################
# Performing exploratory data analysis
##################################
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment <- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50)) +
stat_summary(fun=mean, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=mean, geom="point",color="black",size=5) +
ggtitle("Data Exploration Between Treatment Groups"))

## TRT variable n mean sd median q1 q3 iqr
## 1 Placebo DBP5.DBP1.Difference 20 -4.8 2.419 -5.5 -6.25 -3.75 2.50
## 2 New DBP5.DBP1.Difference 20 -15.2 2.966 -15.0 -17.25 -14.00 3.25
1.6 Evaluation of Important Statistical Assumptions
Evaluating various statistical assumptions showed that :
[A] Assumptions dependent on the nature of the data.
[A.1] Assumption 1: Continuous response variable. Response variable DBP5-DBP1 is numeric. No violation.
[A.2] Assumption 2: Two-level factor variable. Factor variable = TRT contains two levels TRT=New (new drug) and TRT=Placebo (placebo drug). No violation.
[A.3] Assumption 3: Independent observations. Each patient belonged to only one group and there was no relationship noted between patients in each treatment group TRT=New (new drug) and TRT=Placebo (placebo drug). No violation.
[A.4] Assumption 4: Adequate sample size. Experimental size was assumed to have been effectively computed based from the standard deviation, significance, power, and effect size deemed appropriate for the study. No violation.
[B] Assumptions dependent on preliminary testing of the data.
[B.1] Assumption 5: No univariate outliers. While 2 outliers were previously detected for each treatment group TRT=New (new drug) and TRT=Placebo (placebo drug), no extreme outliers were observed. No violation.
[B.2] Assumption 6: Univariate normality. From the results of the Shapiro-Wilk’s test, the determined p-values were greater than the significance level 0.05 indicating that the distribution of the data for both treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug) were not significantly different from the normal distribution. The QQ-Plot (which draws the correlation between the given data and the normal distribution) showed all points falling approximately along the reference line for both groups. No violation.
[B.3] Assumption 7: Homogeneity of variance. From the results of the Levene’s test, the determined p-value was greater than the significance level 0.05 indicating that there was no significant difference between the variances of both treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug). No violation.
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference is.outlier
## 1 Placebo 114 115 113 114 115 38 M 1 TRUE
## 2 New 114 115 113 109 105 43 F -9 TRUE
## is.extreme
## 1 FALSE
## 2 FALSE
## TRT variable statistic p
## 1 Placebo DBP5.DBP1.Difference 0.9312024 0.1628639
## 2 New DBP5.DBP1.Difference 0.9740958 0.8378696
##################################
# Formulating the QQ-Plot
##################################
ggqqplot(DBP.Analysis,x="DBP5.DBP1.Difference",color="TRT",facet.by="TRT") +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_y_continuous(name="Observed (DBP5-DBP1)") +
scale_x_continuous(name="Expected Normal (DBP5-DBP1)") +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50),
strip.text = element_text(color="black",face="bold",size=20)) +
ggtitle("QQ-Plot for Normality Assumption Evaluation")

## df1 df2 statistic p
## 1 1 38 0.285 0.596552
1.7 Statistical Tests for Treatment Comparison Between a Single Factor Variable (2-Level) and a Single Response Variable (Numeric)
[A] Student’s T-Test can be applied on the dataset considering that no statistical assumptions have been violated.
[A.1] Assumption 1: Continuous response variable. No violation.
[A.2] Assumption 2: Two-level factor variable. No violation.
[A.3] Assumption 3: Independent observations. No violation.
[A.4] Assumption 4: Adequate sample size. No violation.
[A.5] Assumption 5: No univariate outliers. No violation.
[A.6] Assumption 6: Univariate normality. No violation.
[A.7] Assumption 7: Homogeneity of variance. No violation.
[B] For comparison, Welch T-Test will be applied on the dataset which is the appropriate test in cases when the assumption on homogeneity of variance is violated.
[B] For comparison, Wilcoxon Rank-Sum Test will be applied on the dataset which is the appropriate test in cases when the assumption on univariate normality is violated.
[B] For comparison, Robust Rank-Order Test will be applied on the dataset which is the appropriate test in cases when the assumptions on univariate normality and homogeneity of variance are both violated.
1.7.1 Student’s T-Test
[A] Study Hypothesis.
[A.1] Null Hypothesis. There is no difference in the mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[A.2] Alternative Hypothesis. There is a difference in the mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[B] Hypothesis Testing Results. The mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) were -15.20 for the treatment group TRT=New (new drug) and -4.80 for the treatment group TRT=Placebo (placebo drug). Student’s T-test showed that the difference between both treatment groups was statistically significant, t(38.00) = 12.15, p < 0.0001, n=40, effect size=3.84 (large magnitude). There is sufficient statistical evidence to reject the null hypothesis. Therefore, the mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug) were different.
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
## estimate estimate1 estimate2 .y. group1 group2 n1 n2
## 1 10.4 -4.8 -15.2 DBP5.DBP1.Difference Placebo New 20 20
## statistic p df conf.low conf.high method alternative p.signif
## 1 12.1504 1.17e-14 38 8.667242 12.13276 T-test two.sided ****
## .y. group1 group2 effsize n1 n2 magnitude
## 1 DBP5.DBP1.Difference Placebo New 3.765956 20 20 large
##################################
# Summarizing the hypothesis testing results
##################################
DBP.Analysis.StudentTTest.Statistics <- DBP.Analysis.StudentTTest %>% add_xy_position(x = "group")
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment <- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=25,face="bold",hjust=0.50)) +
stat_summary(fun=mean, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=mean, geom="point",color="black",size=5) +
labs(subtitle = "t(38.00)=12.15, p=<0.0001, n=40, Effect Size=3.84") +
ggtitle("Hypothesis Testing using Student's T-Test"))

1.7.2 Welch T-Test
[A] Study Hypothesis.
[A.1] Null Hypothesis. There is no difference in the mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[A.2] Alternative Hypothesis. There is a difference in the mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[B] Hypothesis Testing Results. The mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) were -15.20 for the treatment group TRT=New (new drug) and -4.80 for the treatment group TRT=Placebo (placebo drug). Welch T-Test (with relaxed assumptions on Homogeneity of variance) showed that the difference between both treatment groups was statistically significant, t(36.52) = 12.15, p < 0.0001, n=40, effect size=3.84 (large magnitude). There is sufficient statistical evidence to reject the null hypothesis. Therefore, the mean change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug) were different.
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
## estimate estimate1 estimate2 .y. group1 group2 n1 n2
## 1 10.4 -4.8 -15.2 DBP5.DBP1.Difference Placebo New 20 20
## statistic p df conf.low conf.high method alternative p.signif
## 1 12.1504 2.15e-14 36.52227 8.664937 12.13506 T-test two.sided ****
## .y. group1 group2 effsize n1 n2 magnitude
## 1 DBP5.DBP1.Difference Placebo New 3.765956 20 20 large
##################################
# Summarizing the hypothesis testing results
##################################
DBP.Analysis.WelchTTest.Statistics <- DBP.Analysis.WelchTTest %>% add_xy_position(x = "group")
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment <- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=25,face="bold",hjust=0.50)) +
stat_summary(fun=mean, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=mean, geom="point",color="black",size=5) +
labs(subtitle = "t(36.52)=12.15, p=<0.0001, n=40, Effect Size=3.84") +
ggtitle("Hypothesis Testing using Welch T-Test"))

1.7.3 Wilcoxon Rank-Sum Test
[A] Study Hypothesis.
[A.1] Null Hypothesis. There is no difference in the median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[A.2] Alternative Hypothesis. There is a difference in the median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[B] Hypothesis Testing Results. The median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) were -15.00 for the treatment group TRT=New (new drug) and -5.50 for the treatment group TRT=Placebo (placebo drug). Wilcoxon Rank-Sum Test (with relaxed assumptions on univariate normality) showed that the difference between both treatment groups was statistically significant, W = 400.00, p < 0.0001, n=40, effect size=0.86 (large magnitude). There is sufficient statistical evidence to reject the null hypothesis. Therefore, the median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug) were different.
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
## estimate .y. group1 group2 n1 n2 statistic p
## 1 10.00008 DBP5.DBP1.Difference Placebo New 20 20 400 6.29e-08
## conf.low conf.high method alternative p.signif
## 1 8.999927 12.00003 Wilcoxon two.sided ****
## .y. group1 group2 effsize n1 n2 magnitude
## 1 DBP5.DBP1.Difference Placebo New 0.8576142 20 20 large
##################################
# Summarizing the hypothesis testing results
##################################
DBP.Analysis.WilcoxonRankSumTest.Statistics <- DBP.Analysis.WilcoxonRankSumTest %>% add_xy_position(x = "group")
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment <- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=25,face="bold",hjust=0.50)) +
stat_summary(fun=median, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=median, geom="point",color="black",size=5) +
labs(subtitle = "W=400.00, p=<0.0001, n=40, Effect Size=0.86") +
ggtitle("Hypothesis Testing using Wilcoxon Rank-Sum Test"))

1.7.4 Robust Rank-Order Test
[A] Study Hypothesis.
[A.1] Null Hypothesis. There is no difference in the median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[A.2] Alternative Hypothesis. There is a difference in the median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug).
[B] Hypothesis Testing Results. The median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) were -15.00 for the treatment group TRT=New (new drug) and -5.50 for the treatment group TRT=Placebo (placebo drug). Robust Rank-Order Test (with relaxed assumptions on both univariate normality and homogeneity of variance) showed that the difference between both treatment groups was statistically significant, z = Inf, p < 0.0001, n=40, effect size=(not determined). There is sufficient statistical evidence to reject the null hypothesis. Therefore, the median change in diastolic blood pressure readings measured between the 1st and the 5th months (DBP5-DBP1) between treatment groups TRT=New (new drug) and TRT=Placebo (placebo drug) were different.
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##
## Robust Rank-Order Distributional Test
##
## data: DBP5.DBP1.Difference by TRT
## z = Inf, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
##################################
# Summarizing the hypothesis testing results
##################################
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment <- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=20),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=20,face="bold"),
legend.text = element_text(size=20,face="bold"),
text=element_text(size=15),
axis.text.y=element_text(color="black",face="bold",hjust=0.25,size=20),
axis.text.x=element_text(color="black",face="bold",size=20),
axis.title.y=element_text(color="black",face="bold",size=20),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=35,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=25,face="bold",hjust=0.50)) +
stat_summary(fun=median, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=median, geom="point",color="black",size=5) +
labs(subtitle = "z=Inf, p=<0.0001, n=40, Effect Size=ND") +
ggtitle("Hypothesis Testing using Robust Rank-Order Test"))
