setwd("C:/Work Files/Collaborations/Kendall PCOS/T and T Project")Trinidad and Tabago Data
Measures:
PPEQ: Picker Patient Experience Questionnaire
PHCPC_F1: Patient-Health Care Provider Communication Scale - Factor 1
PHCPC_F2: Patient-Health Care Provider Communication Scale - Factor 2
HCRT: Health Care Relationship Trust Scale
Perceived Stigma Scale had no data
Packages Used
library(haven) #reading data from spss, sas, stata
library(tidyverse) #Data management── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 4.0.2 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (corrplot) #correlation tablescorrplot 0.95 loaded
library(psych) ## basic psychometrics and statistics
Attaching package: 'psych'
The following objects are masked from 'package:ggplot2':
%+%, alpha
library(finalfit) #Testing Assumptions
library (performance) #Testing Assumptions
library(MVN) #Assessing multivariate normality
Attaching package: 'MVN'
The following object is masked from 'package:psych':
mardia
library(knitr) #Making Tables
library(kableExtra) #Making Tables
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library(apaTables) #Generating APA formated tables
library(effectsize)
Attaching package: 'effectsize'
The following object is masked from 'package:psych':
phi
library(mice)
Attaching package: 'mice'
The following object is masked from 'package:stats':
filter
The following objects are masked from 'package:base':
cbind, rbind
library(naniar)Data Input
PCOS_Data <-read.csv("MBBS_Group_2_Data.csv")
PCOS_Data <-(PCOS_Data[1:304,])table(PCOS_Data$PCOS_Status)
0 1
155 149
Data Management
PCOS_Data$PPEQ_11 <- as.numeric(PCOS_Data$PPEQ_11)Warning: NAs introduced by coercion
PCOS_Data$Age <-as.numeric(PCOS_Data$Age)
PCOS_Data$Exercise_per_week <-factor(PCOS_Data$Exercise_per_week,
levels = c(1,2,3,4,5,6),
labels = c("1 day", "2 days", "3 days", "4 days", ">5 days", "none"))
PCOS_Data$Type_of_exercise <- factor(PCOS_Data$Type_of_exercise,
levels = c(1,2,3),
labels = c("Mild", "Moderate", "Strenuous"))
PCOS_Data$Hysterectomy <-factor(PCOS_Data$Hysterectomy,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Education <- factor(PCOS_Data$Education,
levels = c(1,2,3,4,5),
labels = c("No Education","Primary","Secondary","Tertiary", "Post-Vocational"))
PCOS_Data$Smoking <- factor(PCOS_Data$Smoking,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Alcohol <- factor(PCOS_Data$Alcohol,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Contraceptive <- factor(PCOS_Data$Contraceptive,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Menopause <- factor(PCOS_Data$Menopause,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Acne <- factor(PCOS_Data$Acne,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Stopped_period <- factor(PCOS_Data$Stopped_period,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Alopecia_stage <- factor(PCOS_Data$Alopecia_stage,
levels = c(0,1,2,3),
labels = c("no alopecia", "Stage 1", "Stage 2", "Stage 3"))
PCOS_Data$Insulin_resistance_medications <- factor(PCOS_Data$Insulin_resistance_medications,
levels = c(1,2),
labels = c("Yes", "No"))
PCOS_Data$Conditions <- factor(PCOS_Data$Conditions,
levels = c(1,2,3,4,5,6,7,8,9,10,11,12),
labels = c("T2D","H2P","Heart Cond","PCOS","Constipation","Cancer","T2D & H2P", "T2D,H2P,& Cancer", "T2D,H2P,Heart, & Cancer","T2D & Constipation","T2D & Heart","T2D & H2P"))
PCOS_Data$PCOS_Status <- factor(PCOS_Data$PCOS_Status,
levels = c(0,1),
labels = c("Non-PCOS","PCOS"))
PCOS_Data$Race <- factor(PCOS_Data$Race,
levels = c(1,2,3),
labels = c("African","East Indian","Mixed"))
PCOS_Data$Exercise_per_day <- factor(PCOS_Data$Exercise_per_day,
levels = c(1,2,3,4,5),
labels = c("< 10 minutes","10-19 minutes", "10-39 minutes","40-59 minutes","> 60 minutes"))PPEQ Scoring
###INFO_Score
items <- c("PPEQ_1","PPEQ_2","PPEQ_14","PPEQ_15","PPEQ_16")
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(items), ~ ifelse(. == 4, NA, .)))
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(items), ~ 4 - ., .names = "{.col}_r"))
info_items_r <- c("PPEQ_1_r","PPEQ_2_r","PPEQ_14_r","PPEQ_15_r","PPEQ_16_r")
PCOS_Data <- PCOS_Data |>
mutate(
Info_Score = ifelse(
rowSums(!is.na(pick(all_of(info_items_r)))) >= 3,
rowMeans(pick(all_of(info_items_r)), na.rm = TRUE),
NA
)
)
#### Emotion_Score
emo_items <- c("PPEQ_4","PPEQ_8","PPEQ_9")
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(emo_items), ~ ifelse(. == 4, NA, .)))
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(emo_items), ~ 4 - ., .names = "{.col}_r"))
emo_items_r <- c("PPEQ_4_r","PPEQ_8_r","PPEQ_9_r")
PCOS_Data <- PCOS_Data |>
mutate(
Emo_Score = ifelse(
rowSums(!is.na(pick(all_of(emo_items_r)))) >= 2,
rowMeans(pick(all_of(emo_items_r)), na.rm = TRUE),
NA
)
)
#### Respect Score
respect_items <- c("PPEQ_5", "PPEQ_7")
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(respect_items), ~ ifelse(. == 4, NA, .)))
PCOS_Data <- PCOS_Data |>
mutate(
PPEQ_5_r = 4 - PPEQ_5,
)
PCOS_Data <- PCOS_Data |>
mutate(
Respect_Score = ifelse(
rowSums(!is.na(pick(PPEQ_5_r, PPEQ_7))) >= 1,
rowMeans(pick(PPEQ_5_r, PPEQ_7), na.rm = TRUE),
NA
)
)Patient Healthcare Provider Communication Scale
qc_items <- paste0("PHCPC_F1_", 1:17)
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(qc_items), as.numeric))
PCOS_Data <- PCOS_Data |>
mutate(
Quality_Communication = rowSums(
pick(all_of(qc_items)),
na.rm = TRUE
)
)
nc_items <- paste0("PHCPC_F2_", 1:4)
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(nc_items), as.numeric))
PCOS_Data <- PCOS_Data |>
mutate(
Negative_Communication = rowSums(
pick(all_of(nc_items)),
na.rm = TRUE
)
)
### Overall Communication
range(PCOS_Data$Negative_Communication, na.rm = TRUE)[1] 4 16
PCOS_Data <- PCOS_Data |>
mutate(
Negative_Communication_r = (16 + 4) - Negative_Communication
)
PCOS_Data <- PCOS_Data |>
mutate(
Overall_Communication = Quality_Communication + Negative_Communication_r
)Health Care Provider Trust Scale
neg_items <- c("HCRT_1", "HCRT_11", "HCRT_13")
PCOS_Data <- PCOS_Data |>
mutate(across(all_of(neg_items), ~ 4 - ., .names = "{.col}_r"))
trust_items <- c(
"HCRT_1_r",
"HCRT_2", "HCRT_3", "HCRT_4", "HCRT_5",
"HCRT_6", "HCRT_7", "HCRT_8", "HCRT_9",
"HCRT_10", "HCRT_11_r", "HCRT_12", "HCRT_13_r",
"HCRT_14"
)
PCOS_Data <- PCOS_Data |>
mutate(
trust_valid = rowSums(!is.na(pick(all_of(trust_items)))),
Trust_Score = ifelse(
trust_valid >= 10,
rowMeans(pick(all_of(trust_items)), na.rm = TRUE),
NA
)
) |>
select(-trust_valid)New Data Set with Scored Scales
PCOS_Data_Scored <-write.csv(PCOS_Data,"PCOS_Data_Scored.csv")
PCOS_Data_Scored <-write_sav(PCOS_Data,"PCOS_Data_Scored.sav")Descriptive Statistics
describe(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)]) vars n mean sd median trimmed mad min max
Height 1 303 161.53 10.10 162.60 162.29 7.41 120.00 190
Weight 2 303 69.15 15.46 67.00 67.85 11.86 6.00 143
Hirsutism_score 3 304 5.80 6.16 3.00 5.02 4.45 0.00 26
Age 4 304 29.07 7.39 28.00 28.55 7.41 18.00 45
Emo_Score 5 294 1.97 0.50 2.00 1.96 0.49 1.00 3
Respect_Score 6 304 1.62 0.53 1.50 1.58 0.74 1.00 3
Quality_Communication 7 304 47.23 11.82 47.00 47.62 11.86 17.00 68
Negative_Communication 8 304 7.26 3.06 6.50 6.82 2.22 4.00 16
Overall_Communication 9 304 59.96 14.02 61.00 60.77 13.34 21.00 84
Trust_Score 10 304 2.61 0.84 2.71 2.67 0.85 0.21 4
range skew kurtosis se
Height 70.00 -1.19 3.73 0.58
Weight 137.00 1.12 3.99 0.89
Hirsutism_score 26.00 0.83 -0.38 0.35
Age 27.00 0.52 -0.79 0.42
Emo_Score 2.00 0.16 -0.40 0.03
Respect_Score 2.00 0.36 -0.68 0.03
Quality_Communication 51.00 -0.29 -0.46 0.68
Negative_Communication 12.00 1.13 0.78 0.18
Overall_Communication 63.00 -0.48 -0.19 0.80
Trust_Score 3.79 -0.57 0.08 0.05
describe.by(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)],PCOS_Data_Scored$PCOS_Status)Warning in describe.by(PCOS_Data_Scored[, c(2, 3, 18, 20, 82, 84, 85, 86, :
describe.by is deprecated. Please use the describeBy function
Descriptive statistics by group
group: Non-PCOS
vars n mean sd median trimmed mad min max
Height 1 154 160.83 10.55 162.00 161.80 7.04 120.00 190
Weight 2 155 64.74 11.05 63.00 63.93 8.90 44.50 125
Hirsutism_score 3 155 3.62 4.95 1.00 2.83 1.48 0.00 18
Age 4 155 29.45 7.55 29.00 28.90 8.90 18.00 45
Emo_Score 5 146 2.03 0.46 2.00 2.02 0.49 1.00 3
Respect_Score 6 155 1.65 0.57 1.50 1.60 0.74 1.00 3
Quality_Communication 7 155 47.17 12.42 48.00 47.74 11.86 17.00 68
Negative_Communication 8 155 7.14 3.30 6.00 6.58 2.97 4.00 16
Overall_Communication 9 155 60.03 15.00 61.00 61.14 13.34 21.00 84
Trust_Score 10 155 2.58 0.93 2.71 2.66 0.85 0.21 4
range skew kurtosis se
Height 70.00 -1.16 3.23 0.85
Weight 80.50 1.35 4.65 0.89
Hirsutism_score 18.00 1.14 -0.23 0.40
Age 27.00 0.52 -0.79 0.61
Emo_Score 2.00 0.37 -0.35 0.04
Respect_Score 2.00 0.45 -0.63 0.05
Quality_Communication 51.00 -0.42 -0.40 1.00
Negative_Communication 12.00 1.27 0.87 0.27
Overall_Communication 63.00 -0.65 -0.12 1.20
Trust_Score 3.79 -0.67 -0.15 0.07
------------------------------------------------------------
group: PCOS
vars n mean sd median trimmed mad min max
Height 1 149 162.24 9.59 163.00 162.78 7.41 121.00 190.0
Weight 2 148 73.78 17.91 72.40 72.76 14.68 6.00 143.0
Hirsutism_score 3 149 8.07 6.48 7.00 7.60 8.90 0.00 26.0
Age 4 149 28.68 7.23 27.00 28.22 7.41 18.00 45.0
Emo_Score 5 148 1.91 0.53 2.00 1.90 0.49 1.00 3.0
Respect_Score 6 149 1.59 0.48 1.50 1.57 0.74 1.00 2.5
Quality_Communication 7 149 47.28 11.21 47.00 47.45 11.86 20.00 68.0
Negative_Communication 8 149 7.39 2.78 7.00 7.09 2.97 4.00 16.0
Overall_Communication 9 149 59.89 12.98 59.00 60.22 13.34 29.00 84.0
Trust_Score 10 149 2.64 0.74 2.71 2.66 0.74 0.36 4.0
range skew kurtosis se
Height 69.00 -1.17 4.21 0.79
Weight 137.00 0.67 2.87 1.47
Hirsutism_score 26.00 0.49 -0.76 0.53
Age 27.00 0.50 -0.87 0.59
Emo_Score 2.00 0.11 -0.61 0.04
Respect_Score 1.50 0.11 -1.20 0.04
Quality_Communication 48.00 -0.09 -0.67 0.92
Negative_Communication 12.00 0.91 0.42 0.23
Overall_Communication 55.00 -0.21 -0.52 1.06
Trust_Score 3.64 -0.27 -0.04 0.06
PCOS_Data_Scored_No_NA <- na.omit(PCOS_Data_Scored)
apa.cor.table(PCOS_Data_Scored[,c(2,3,18,20,82,84,85,86,88,92)])
Means, standard deviations, and correlations with confidence intervals
Variable M SD 1 2 3
1. Height 161.53 10.10
2. Weight 69.15 15.46 .27**
[.16, .37]
3. Hirsutism_score 5.80 6.16 .05 .19**
[-.06, .17] [.08, .30]
4. Age 29.07 7.39 .06 .16** -.02
[-.05, .17] [.05, .27] [-.14, .09]
5. Emo_Score 1.97 0.50 .14* -.13* .15*
[.02, .25] [-.24, -.01] [.03, .26]
6. Respect_Score 1.62 0.53 -.11 -.05 -.05
[-.22, .00] [-.17, .06] [-.16, .06]
7. Quality_Communication 47.23 11.82 .17** -.04 .11*
[.06, .28] [-.15, .07] [.00, .22]
8. Negative_Communication 7.26 3.06 -.13* .04 -.16**
[-.24, -.02] [-.08, .15] [-.27, -.05]
9. Overall_Communication 59.96 14.02 .18** -.04 .13*
[.06, .28] [-.15, .07] [.02, .24]
10. Trust_Score 2.61 0.84 .15** -.04 .19**
[.04, .26] [-.15, .08] [.08, .30]
4 5 6 7 8 9
.03
[-.09, .14]
.15** -.21**
[.04, .26] [-.32, -.10]
-.08 .41** -.54**
[-.19, .04] [.31, .50] [-.62, -.46]
.15** -.23** .42** -.66**
[.04, .26] [-.34, -.12] [.32, .51] [-.72, -.59]
-.10 .40** -.55** .99** -.77**
[-.21, .02] [.29, .49] [-.62, -.46] [.98, .99] [-.81, -.72]
-.11 .34** -.54** .85** -.72** .88**
[-.22, .00] [.23, .44] [-.62, -.46] [.82, .88] [-.77, -.66] [.85, .90]
Note. M and SD are used to represent mean and standard deviation, respectively.
Values in square brackets indicate the 95% confidence interval.
The confidence interval is a plausible range of population correlations
that could have caused the sample correlation (Cumming, 2014).
* indicates p < .05. ** indicates p < .01.
res <- cor(PCOS_Data_Scored_No_NA[,c(2,3,18,20,82,84,85,86,88,92)])
round(res, 2) Height Weight Hirsutism_score Age Emo_Score
Height 1.00 -0.27 0.10 0.22 0.17
Weight -0.27 1.00 0.16 0.19 -0.24
Hirsutism_score 0.10 0.16 1.00 0.12 0.17
Age 0.22 0.19 0.12 1.00 -0.20
Emo_Score 0.17 -0.24 0.17 -0.20 1.00
Respect_Score 0.35 -0.48 0.33 0.41 -0.01
Quality_Communication -0.11 0.15 0.13 0.05 0.37
Negative_Communication 0.17 -0.04 -0.30 0.16 -0.35
Overall_Communication -0.15 0.16 0.21 0.01 0.46
Trust_Score 0.01 0.01 0.10 -0.14 0.50
Respect_Score Quality_Communication
Height 0.35 -0.11
Weight -0.48 0.15
Hirsutism_score 0.33 0.13
Age 0.41 0.05
Emo_Score -0.01 0.37
Respect_Score 1.00 -0.12
Quality_Communication -0.12 1.00
Negative_Communication 0.09 0.03
Overall_Communication -0.14 0.96
Trust_Score 0.01 0.70
Negative_Communication Overall_Communication Trust_Score
Height 0.17 -0.15 0.01
Weight -0.04 0.16 0.01
Hirsutism_score -0.30 0.21 0.10
Age 0.16 0.01 -0.14
Emo_Score -0.35 0.46 0.50
Respect_Score 0.09 -0.14 0.01
Quality_Communication 0.03 0.96 0.70
Negative_Communication 1.00 -0.24 -0.19
Overall_Communication -0.24 1.00 0.74
Trust_Score -0.19 0.74 1.00
corrplot(res,
type = "lower",
order = "hclust",
tl.col = "black",
tl.srt = 45,
addCoef.col = "black",
sig.level = 0.05,
insig = "blank")png("correlation_plot.png", width = 2000, height = 2000, res = 300)
corrplot(res,
type = "lower",
order = "hclust",
tl.col = "black",
tl.srt = 45,
addCoef.col = "black",
number.cex = 0.6,
insig = "blank")
dev.off()png
2
describe(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])%>%
knitr::kable(digits = 3, format="html", booktabs=TRUE, caption="Table 1. Descriptives")%>%
kable_classic(full_width = F, html_font = "Cambria")| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Height | 1 | 303 | 161.526 | 10.099 | 162.600 | 162.291 | 7.413 | 120.000 | 190 | 70.000 | -1.185 | 3.731 | 0.580 |
| Weight | 2 | 303 | 69.154 | 15.456 | 67.000 | 67.854 | 11.861 | 6.000 | 143 | 137.000 | 1.121 | 3.991 | 0.888 |
| Hirsutism_score | 3 | 304 | 5.803 | 6.161 | 3.000 | 5.025 | 4.448 | 0.000 | 26 | 26.000 | 0.831 | -0.376 | 0.353 |
| Age | 4 | 304 | 29.069 | 7.395 | 28.000 | 28.549 | 7.413 | 18.000 | 45 | 27.000 | 0.516 | -0.794 | 0.424 |
| Emo_Score | 5 | 294 | 1.970 | 0.499 | 2.000 | 1.963 | 0.494 | 1.000 | 3 | 2.000 | 0.162 | -0.400 | 0.029 |
| Respect_Score | 6 | 304 | 1.625 | 0.532 | 1.500 | 1.580 | 0.741 | 1.000 | 3 | 2.000 | 0.361 | -0.679 | 0.031 |
| Quality_Communication | 7 | 304 | 47.227 | 11.821 | 47.000 | 47.623 | 11.861 | 17.000 | 68 | 51.000 | -0.290 | -0.462 | 0.678 |
| Negative_Communication | 8 | 304 | 7.263 | 3.057 | 6.500 | 6.824 | 2.224 | 4.000 | 16 | 12.000 | 1.131 | 0.782 | 0.175 |
| Overall_Communication | 9 | 304 | 59.964 | 14.022 | 61.000 | 60.770 | 13.343 | 21.000 | 84 | 63.000 | -0.483 | -0.189 | 0.804 |
| Trust_Score | 10 | 304 | 2.611 | 0.841 | 2.714 | 2.665 | 0.847 | 0.214 | 4 | 3.786 | -0.569 | 0.081 | 0.048 |
random <- rnorm(nrow(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)]), 7)
#The command above generates a random variable with the same number of rows (values)as the dataset
hist(random)#just to check the distribtuion of this new variablefakereg <-lm(random ~., data = PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])
##runs a regression with the new random variable as the dv and all the variables in the dataset as IVs
##This generates a set of residuals in order to check the assumptions
##The following set of code just scales the residuals
standardized <- rstudent(fakereg)
fitted <- scale(fakereg$fitted.values)
hist(fitted)check_model (fakereg) MVN::mvn(
PCOS_Data_Scored_No_NA[, c(2,3,18,20,82,84,85,86,88,92)]
)$multivariate_normality
Test Statistic p.value Method MVN
1 Henze-Zirkler 1.006 0.002 asymptotic ✗ Not normal
$univariate_normality
Test Variable Statistic p.value Normality
1 Anderson-Darling Height 0.297 0.56 ✓ Normal
2 Anderson-Darling Weight 1.521 <0.001 ✗ Not normal
3 Anderson-Darling Hirsutism_score 1.014 0.009 ✗ Not normal
4 Anderson-Darling Age 1.163 0.004 ✗ Not normal
5 Anderson-Darling Emo_Score 0.779 0.036 ✗ Not normal
6 Anderson-Darling Respect_Score 1.058 0.007 ✗ Not normal
7 Anderson-Darling Quality_Communication 0.408 0.318 ✓ Normal
8 Anderson-Darling Negative_Communication 0.982 0.011 ✗ Not normal
9 Anderson-Darling Overall_Communication 0.281 0.605 ✓ Normal
10 Anderson-Darling Trust_Score 0.565 0.126 ✓ Normal
$descriptives
Variable n Mean Std.Dev Median Min Max 25th
1 Height 22 165.864 5.676 165.000 157.000 178.000 162.250
2 Weight 22 74.286 12.689 71.900 62.000 113.600 66.250
3 Hirsutism_score 22 7.636 5.551 9.000 0.000 15.000 2.250
4 Age 22 34.682 8.191 39.000 19.000 45.000 29.250
5 Emo_Score 22 2.197 0.456 2.000 1.333 3.000 2.000
6 Respect_Score 22 1.705 0.549 1.750 1.000 2.500 1.125
7 Quality_Communication 22 52.727 7.598 52.000 40.000 68.000 45.500
8 Negative_Communication 22 6.591 2.153 6.000 4.000 12.000 5.000
9 Overall_Communication 22 66.136 7.827 64.500 53.000 84.000 59.500
10 Trust_Score 22 2.981 0.555 2.857 2.000 3.786 2.589
75th Skew Kurtosis
1 170.000 0.334 2.465
2 77.750 1.714 5.562
3 12.750 -0.266 1.489
4 41.000 -0.697 2.093
5 2.333 0.319 2.481
6 2.000 0.016 1.725
7 57.000 0.256 2.366
8 7.750 1.045 3.356
9 72.000 0.311 2.478
10 3.554 0.047 1.795
$data
Height Weight Hirsutism_score Age Emo_Score Respect_Score
26 163 63.0 3 19 2.666667 1.0
93 170 67.0 0 21 3.000000 1.0
106 172 68.0 0 37 1.666667 2.0
110 168 63.0 6 39 2.000000 1.5
116 163 78.0 0 39 2.333333 1.5
118 165 78.0 13 42 2.000000 1.5
119 164 68.0 7 41 1.333333 2.5
129 162 78.0 11 32 2.000000 2.0
137 178 66.0 9 41 2.333333 2.5
138 160 64.0 12 39 2.333333 2.5
139 170 72.0 11 42 2.666667 2.5
140 167 77.0 13 30 3.000000 2.0
148 170 62.0 12 40 3.000000 2.0
153 160 72.0 14 24 2.000000 2.0
155 169 77.0 15 42 2.333333 1.5
169 176 67.0 14 27 2.000000 2.0
221 160 72.7 2 20 1.666667 1.0
223 165 71.8 0 45 2.000000 2.0
224 170 113.6 9 42 1.666667 1.0
229 157 95.9 13 29 2.333333 1.0
246 163 65.0 0 32 2.000000 1.5
256 157 95.3 4 40 2.000000 1.0
Quality_Communication Negative_Communication Overall_Communication
26 61 6 75
93 44 6 58
106 51 5 66
110 47 6 61
116 51 6 65
118 51 7 64
119 40 7 53
129 44 5 59
137 53 9 64
138 56 5 71
139 57 5 72
140 59 5 74
148 56 4 72
153 51 8 63
155 58 5 73
169 44 9 55
221 44 5 59
223 56 12 64
224 57 8 69
229 68 4 84
246 67 11 76
256 45 7 58
Trust_Score
26 3.785714
93 2.571429
106 2.857143
110 2.642857
116 2.928571
118 2.857143
119 2.142857
129 2.714286
137 3.142857
138 3.500000
139 3.571429
140 3.785714
148 3.642857
153 2.428571
155 2.000000
169 2.714286
221 2.714286
223 2.571429
224 3.357143
229 3.714286
246 3.571429
256 2.357143
$subset
NULL
$outlierMethod
[1] "none"
attr(,"class")
[1] "mvn"
vis_miss(PCOS_Data_Scored)gg_miss_var(PCOS_Data_Scored)md.pattern(PCOS_Data_Scored[,c(2,3,18,20,82,84,85,86,88,92)]) Hirsutism_score Age Respect_Score Quality_Communication
292 1 1 1 1
10 1 1 1 1
1 1 1 1 1
1 1 1 1 1
0 0 0 0
Negative_Communication Overall_Communication Trust_Score Height Weight
292 1 1 1 1 1
10 1 1 1 1 1
1 1 1 1 1 0
1 1 1 1 0 1
0 0 0 1 1
Emo_Score
292 1 0
10 0 1
1 1 1
1 1 1
10 12
Analyses
### Age
str(PCOS_Data$PCOS_Status) Factor w/ 2 levels "Non-PCOS","PCOS": 2 2 2 2 2 2 2 2 2 2 ...
Age_model <- aov(Age ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Age ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Age by PCOS_Status
t = 0.9049, df = 302, p-value = 0.3662
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-0.9013285 2.4359464
sample estimates:
mean in group Non-PCOS mean in group PCOS
29.44516 28.67785
summary(Age_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 45 44.73 0.817 0.367
Residuals 302 16525 54.72
cohens_d(
Age ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
-------------------------
0.10 | [-0.12, 0.33]
- Estimated using pooled SD.
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_age = mean(Age, na.rm = TRUE),
sd_age = sd(Age, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_age sd_age
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 29.4 7.55
2 PCOS 149 28.7 7.23
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Age)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
labs(
title = "Age by PCOS Status",
x = "PCOS Status",
y = "Age"
)### Weight
Weight_model <- aov(Weight ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Weight ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Weight by PCOS_Status
t = -5.2563, df = 242.71, p-value = 3.22e-07
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-12.422722 -5.650041
sample estimates:
mean in group Non-PCOS mean in group PCOS
64.74065 73.77703
summary(Weight_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 6182 6182 28.21 2.12e-07 ***
Residuals 301 65965 219
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
1 observation deleted due to missingness
cohens_d(
Weight ~ PCOS_Status,
data = PCOS_Data_Scored
)Warning: Missing values detected. NAs dropped.
Cohen's d | 95% CI
--------------------------
-0.61 | [-0.84, -0.38]
- Estimated using pooled SD.
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Weight = mean(Weight, na.rm = TRUE),
sd_Weight = sd(Weight, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Weight sd_Weight
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 64.7 11.1
2 PCOS 149 73.8 17.9
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Weight), is.finite(Weight), !is.na(PCOS_Status))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Weight)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
labs(
title = "Weight by PCOS Status",
x = "PCOS Status",
y = "Weight"
)Warning: Removed 1 row containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).
#### PPEQ Info Score
Info_Score_model <- aov(Info_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Info_Score ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Info_Score by PCOS_Status
t = -0.030664, df = 294.03, p-value = 0.9756
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-0.1169905 0.1134007
sample estimates:
mean in group Non-PCOS mean in group PCOS
2.022478 2.024273
cohens_d(
Info_Score ~ PCOS_Status,
data = PCOS_Data_Scored
)Warning: Missing values detected. NAs dropped.
Cohen's d | 95% CI
-------------------------
-3.53e-03 | [-0.23, 0.22]
- Estimated using pooled SD.
summary(Info_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 0.00 0.00024 0.001 0.976
Residuals 299 77.31 0.25856
3 observations deleted due to missingness
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Info_Score = mean(Info_Score, na.rm = TRUE),
sd_Info_Score = sd(Info_Score, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Info_Score sd_Info_Score
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 2.02 0.545
2 PCOS 149 2.02 0.469
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Info_Score), is.finite(Info_Score), !is.na(Info_Score))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Info_Score)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
labs(
title = "Info_Score by PCOS Status",
x = "PCOS Status",
y = "Info_Score"
)Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).
#### PPEQ Emotion Score
Emo_Score_model <- aov(Emo_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Emo_Score ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Emo_Score by PCOS_Status
t = 2.2102, df = 288.11, p-value = 0.02788
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
0.0139822 0.2414479
sample estimates:
mean in group Non-PCOS mean in group PCOS
2.034247 1.906532
cohens_d(
Emo_Score ~ PCOS_Status,
data = PCOS_Data_Scored
)Warning: Missing values detected. NAs dropped.
Cohen's d | 95% CI
------------------------
0.26 | [0.03, 0.49]
- Estimated using pooled SD.
summary(Emo_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 1.20 1.1988 4.876 0.028 *
Residuals 292 71.79 0.2458
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
10 observations deleted due to missingness
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Emo_Score = mean(Emo_Score, na.rm = TRUE),
sd_Emo_Score = sd(Emo_Score, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Emo_Score sd_Emo_Score
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 2.03 0.462
2 PCOS 149 1.91 0.527
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Emo_Score), is.finite(Emo_Score), !is.na(Emo_Score))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Emo_Score)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
coord_cartesian(ylim = c(0, 4)) +
labs(
title = "Emotion_Score by PCOS Status",
x = "PCOS Status",
y = "Emo_Score"
)Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 10 rows containing missing values or values outside the scale range
(`geom_point()`).
#### Respect Score
Respect_Score_model <- aov(Respect_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Respect_Score ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Respect_Score by PCOS_Status
t = 1.0008, df = 296.98, p-value = 0.3177
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-0.05883484 0.18059280
sample estimates:
mean in group Non-PCOS mean in group PCOS
1.654839 1.593960
cohens_d(
Respect_Score ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
-------------------------
0.11 | [-0.11, 0.34]
- Estimated using pooled SD.
summary(Respect_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 0.28 0.2816 0.995 0.319
Residuals 302 85.47 0.2830
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Respect_Score = mean(Respect_Score, na.rm = TRUE),
sd_Respect_Score = sd(Respect_Score, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Respect_Score sd_Respect_Score
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 1.65 0.574
2 PCOS 149 1.59 0.484
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Respect_Score), is.finite(Respect_Score), !is.na(Respect_Score))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Respect_Score)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
coord_cartesian(ylim = c(0, 4)) +
labs(
title = "Respect_Score by PCOS Status",
x = "PCOS Status",
y = "Respect_Score"
)#### Quality Communication
Quality_Communication_Score_model <- aov(Quality_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Quality_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Quality_Communication by PCOS_Status
t = -0.07943, df = 300.81, p-value = 0.9367
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-2.775589 2.560218
sample estimates:
mean in group Non-PCOS mean in group PCOS
47.17419 47.28188
cohens_d(
Quality_Communication ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
-------------------------
-9.09e-03 | [-0.23, 0.22]
- Estimated using pooled SD.
summary(Quality_Communication_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 1 0.88 0.006 0.937
Residuals 302 42340 140.20
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Quality_Communication = mean(Quality_Communication, na.rm = TRUE),
sd_Quality_Communication = sd(Quality_Communication, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Quality_Communication sd_Quality_Communication
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 47.2 12.4
2 PCOS 149 47.3 11.2
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Quality_Communication), is.finite(Quality_Communication), !is.na(Quality_Communication))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Quality_Communication)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
coord_cartesian(ylim = c(15, 70)) +
labs(
title = "Quality_Communication_Score by PCOS Status",
x = "PCOS Status",
y = "Quality_Communication_Score"
)##### Negative Communication
Negative_Communication_Score_model <- aov(Negative_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Negative_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Negative_Communication by PCOS_Status
t = -0.70705, df = 296.89, p-value = 0.4801
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-0.9357275 0.4410750
sample estimates:
mean in group Non-PCOS mean in group PCOS
7.141935 7.389262
cohens_d(
Negative_Communication ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
-------------------------
-0.08 | [-0.31, 0.14]
- Estimated using pooled SD.
summary(Negative_Communication_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 4.6 4.647 0.497 0.482
Residuals 302 2826.3 9.359
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Negative_Communication = mean(Negative_Communication, na.rm = TRUE),
sd_Negative_Communication = sd(Negative_Communication, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Negative_Communication sd_Negative_Communication
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 7.14 3.30
2 PCOS 149 7.39 2.78
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Negative_Communication), is.finite (Negative_Communication), !is.na(Negative_Communication))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Negative_Communication)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
coord_cartesian(ylim = c(3, 18)) +
labs(
title = "Negative_Communication_Score by PCOS Status",
x = "PCOS Status",
y = "Negative_Communication_Score"
)#### Overall Communicataion
Overall_Communication_Score_model <- aov(Overall_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Overall_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Overall_Communication by PCOS_Status
t = 0.086906, df = 298.77, p-value = 0.9308
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-3.022427 3.301708
sample estimates:
mean in group Non-PCOS mean in group PCOS
60.03226 59.89262
cohens_d(
Overall_Communication ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
-------------------------
9.94e-03 | [-0.21, 0.23]
- Estimated using pooled SD.
summary(Overall_Communication_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 1 1.48 0.008 0.931
Residuals 302 59571 197.26
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Overall_Communication = mean(Overall_Communication, na.rm = TRUE),
sd_Overall_Communication = sd(Overall_Communication, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Overall_Communication sd_Overall_Communication
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 60.0 15.0
2 PCOS 149 59.9 13.0
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Overall_Communication), is.finite (Overall_Communication), !is.na(Overall_Communication))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Overall_Communication)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
coord_cartesian(ylim = c(19, 85)) +
labs(
title = "Overall_Communication by PCOS Status",
x = "PCOS Status",
y = "Overall_Communication_Score"
)#### Trust Score
Trust_Score_model <- aov(Trust_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Trust_Score ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Trust_Score by PCOS_Status
t = -0.57959, df = 292.87, p-value = 0.5626
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-0.2451808 0.1336249
sample estimates:
mean in group Non-PCOS mean in group PCOS
2.583871 2.639649
cohens_d(
Trust_Score ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
-------------------------
-0.07 | [-0.29, 0.16]
- Estimated using pooled SD.
summary(Trust_Score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 0.24 0.2364 0.333 0.564
Residuals 302 214.31 0.7096
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Trust_Score = mean(Trust_Score, na.rm = TRUE),
sd_Trust_Score = sd(Trust_Score, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Trust_Score sd_Trust_Score
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 2.58 0.927
2 PCOS 149 2.64 0.745
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Trust_Score), is.finite (Trust_Score), !is.na(Trust_Score))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Trust_Score)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
labs(
title = "Trust_Score by PCOS Status",
x = "PCOS Status",
y = "Trust_Score"
)#### Hirsutism
Hirsutism_score_model <- aov(Hirsutism_score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Hirsutism_score ~ PCOS_Status, data = PCOS_Data_Scored)
Welch Two Sample t-test
data: Hirsutism_score by PCOS_Status
t = -6.7147, df = 276.98, p-value = 1.065e-10
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
-5.760408 -3.148533
sample estimates:
mean in group Non-PCOS mean in group PCOS
3.619355 8.073826
cohens_d(
Hirsutism_score ~ PCOS_Status,
data = PCOS_Data_Scored
)Cohen's d | 95% CI
--------------------------
-0.77 | [-1.01, -0.54]
- Estimated using pooled SD.
summary(Hirsutism_score_model) Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status 1 1507 1507.4 45.56 7.58e-11 ***
Residuals 302 9993 33.1
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PCOS_Data_Scored |>
group_by(PCOS_Status) |>
summarise(
n = n(),
mean_Hirsutism_score = mean(Hirsutism_score, na.rm = TRUE),
sd_Hirsutism_score = sd(Hirsutism_score, na.rm = TRUE)
)# A tibble: 2 × 4
PCOS_Status n mean_Hirsutism_score sd_Hirsutism_score
<fct> <int> <dbl> <dbl>
1 Non-PCOS 155 3.62 4.95
2 PCOS 149 8.07 6.48
plot_data <- PCOS_Data_Scored |>
filter(!is.na(Hirsutism_score), is.finite (Hirsutism_score), !is.na(Hirsutism_score))
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Hirsutism_score)) +
geom_boxplot() +
geom_jitter(width = 0.1, alpha = 0.4) +
labs(
title = "Hirsutism score by PCOS Status",
x = "PCOS Status",
y = "Hirsutism score"
)\(\chi^2 Analyses\)
vars <- c("Education", "Smoking", "Alcohol", "Exercise_per_day", "Exercise_per_week","Type_of_exercise","Conditions","Hysterectomy","Contraceptive","Alopecia_stage","Acne","Insulin_resistance_medications","Menopause","Stopped_period","Race")
fisher_results <- lapply(vars, function(v) {
tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored$PCOS_Status)
# column percentages (within PCOS group)
prop_tab <- prop.table(tab, margin = 2) * 100
test <- fisher.test(tab)
data.frame(
Variable = v,
p_value = test$p.value,
levels = nrow(tab),
pct_table = paste(round(prop_tab, 1), collapse = " | ")
)
})
fisher_table <- do.call(rbind, fisher_results)
fisher_table Variable p_value levels
1 Education 3.245780e-01 5
2 Smoking 2.785361e-02 2
3 Alcohol 5.727535e-02 2
4 Exercise_per_day 6.586983e-02 5
5 Exercise_per_week 4.685501e-01 6
6 Type_of_exercise 5.127177e-01 3
7 Conditions 7.371856e-01 11
8 Hysterectomy 2.146467e-01 2
9 Contraceptive 3.119180e-13 2
10 Alopecia_stage 7.041877e-12 4
11 Acne 6.634508e-07 2
12 Insulin_resistance_medications 6.218387e-14 2
13 Menopause 2.832929e-01 2
14 Stopped_period 1.000000e+00 2
15 Race 4.847089e-01 3
pct_table
1 0.6 | 0.6 | 29 | 65.2 | 4.5 | 0 | 0 | 37.6 | 57.7 | 4.7
2 21.3 | 78.7 | 32.9 | 67.1
3 86.3 | 13.7 | 93.3 | 6.7
4 18.1 | 19.4 | 32.9 | 8.4 | 21.3 | 10.1 | 16.8 | 47 | 10.1 | 16.1
5 10.3 | 26.5 | 19.4 | 7.1 | 5.8 | 31 | 6.7 | 28.2 | 25.5 | 10.1 | 5.4 | 24.2
6 49.5 | 41.1 | 9.3 | 46 | 47.8 | 6.2
7 21.4 | 42.9 | 7.1 | 0 | 7.1 | 0 | 7.1 | 7.1 | 0 | 0 | 7.1 | 10.7 | 39.3 | 7.1 | 0 | 14.3 | 3.6 | 17.9 | 0 | 3.6 | 3.6 | 0
8 3.2 | 96.8 | 0.7 | 99.3
9 8.4 | 91.6 | 44.3 | 55.7
10 91 | 7.7 | 1.3 | 0 | 56.4 | 33.6 | 10.1 | 0
11 40.6 | 59.4 | 69.1 | 30.9
12 2.6 | 97.4 | 34.2 | 65.8
13 3.9 | 96.1 | 1.3 | 98.7
14 3.9 | 96.1 | 4 | 96
15 16.8 | 69.7 | 13.5 | 21.5 | 67.8 | 10.7
desc_table <- lapply(vars, function(v) {
PCOS_Data_Scored |>
count(Level = .data[[v]]) |>
mutate(
Variable = v,
pct = round(n / sum(n) * 100, 1),
`n (%)` = paste0(n, " (", pct, "%)")
) |>
select(Variable, Level, `n (%)`)
}) |>
bind_rows()
desc_table Variable Level n (%)
1 Education No Education 1 (0.3%)
2 Education Primary 1 (0.3%)
3 Education Secondary 101 (33.2%)
4 Education Tertiary 187 (61.5%)
5 Education Post-Vocational 14 (4.6%)
6 Smoking Yes 82 (27%)
7 Smoking No 222 (73%)
8 Alcohol Yes 271 (89.1%)
9 Alcohol No 31 (10.2%)
10 Alcohol <NA> 2 (0.7%)
11 Exercise_per_day < 10 minutes 43 (14.1%)
12 Exercise_per_day 10-19 minutes 55 (18.1%)
13 Exercise_per_day 10-39 minutes 121 (39.8%)
14 Exercise_per_day 40-59 minutes 28 (9.2%)
15 Exercise_per_day > 60 minutes 57 (18.8%)
16 Exercise_per_week 1 day 26 (8.6%)
17 Exercise_per_week 2 days 83 (27.3%)
18 Exercise_per_week 3 days 68 (22.4%)
19 Exercise_per_week 4 days 26 (8.6%)
20 Exercise_per_week >5 days 17 (5.6%)
21 Exercise_per_week none 84 (27.6%)
22 Type_of_exercise Mild 105 (34.5%)
23 Type_of_exercise Moderate 98 (32.2%)
24 Type_of_exercise Strenuous 17 (5.6%)
25 Type_of_exercise <NA> 84 (27.6%)
26 Conditions T2D 6 (2%)
27 Conditions H2P 17 (5.6%)
28 Conditions Heart Cond 3 (1%)
29 Conditions Constipation 5 (1.6%)
30 Conditions Cancer 1 (0.3%)
31 Conditions T2D & H2P 6 (2%)
32 Conditions T2D,H2P,& Cancer 1 (0.3%)
33 Conditions T2D,H2P,Heart, & Cancer 1 (0.3%)
34 Conditions T2D & Constipation 1 (0.3%)
35 Conditions T2D & Heart 1 (0.3%)
36 Conditions <NA> 262 (86.2%)
37 Hysterectomy Yes 6 (2%)
38 Hysterectomy No 298 (98%)
39 Contraceptive Yes 79 (26%)
40 Contraceptive No 225 (74%)
41 Alopecia_stage no alopecia 225 (74%)
42 Alopecia_stage Stage 1 62 (20.4%)
43 Alopecia_stage Stage 2 17 (5.6%)
44 Acne Yes 166 (54.6%)
45 Acne No 138 (45.4%)
46 Insulin_resistance_medications Yes 55 (18.1%)
47 Insulin_resistance_medications No 249 (81.9%)
48 Menopause Yes 8 (2.6%)
49 Menopause No 296 (97.4%)
50 Stopped_period Yes 12 (3.9%)
51 Stopped_period No 292 (96.1%)
52 Race African 58 (19.1%)
53 Race East Indian 209 (68.8%)
54 Race Mixed 37 (12.2%)
table(PCOS_Data_Scored$PCOS_Status)
Non-PCOS PCOS
155 149
library(dplyr)
vars <- c(
"Education",
"Smoking",
"Alcohol",
"Exercise_per_day",
"Exercise_per_week",
"Type_of_exercise",
"Conditions",
"Hysterectomy",
"Contraceptive",
"Alopecia_stage",
"Acne",
"Insulin_resistance_medications",
"Menopause",
"Stopped_period",
"Race"
)
fisher_results <- lapply(vars, function(v) {
tab <- table(
PCOS_Data_Scored[[v]],
PCOS_Data_Scored$PCOS_Status
)
prop_tab <- round(prop.table(tab, margin = 2) * 100, 1)
test <- fisher.test(tab)
OR <- NA
LCL <- NA
UCL <- NA
OR_CI <- NA
if (all(dim(tab) == c(2, 2))) {
OR <- unname(test$estimate)
LCL <- test$conf.int[1]
UCL <- test$conf.int[2]
OR_CI <- sprintf("%.2f (%.2f, %.2f)", OR, LCL, UCL)
}
data.frame(
Variable = v,
Levels = nrow(tab),
p_value = round(test$p.value, 4),
Odds_Ratio = round(OR, 2),
CI_Lower = round(LCL, 2),
CI_Upper = round(UCL, 2),
`OR (95% CI)` = OR_CI,
pct_table = paste(prop_tab, collapse = " | "),
stringsAsFactors = FALSE
)
})
fisher_table <- bind_rows(fisher_results)
fisher_table Variable Levels p_value Odds_Ratio CI_Lower CI_Upper
1 Education 5 0.3246 NA NA NA
2 Smoking 2 0.0279 0.55 0.32 0.95
3 Alcohol 2 0.0573 0.45 0.18 1.05
4 Exercise_per_day 5 0.0659 NA NA NA
5 Exercise_per_week 6 0.4686 NA NA NA
6 Type_of_exercise 3 0.5127 NA NA NA
7 Conditions 11 0.7372 NA NA NA
8 Hysterectomy 2 0.2146 4.91 0.54 234.62
9 Contraceptive 2 0.0000 0.12 0.06 0.23
10 Alopecia_stage 4 0.0000 NA NA NA
11 Acne 2 0.0000 0.31 0.19 0.50
12 Insulin_resistance_medications 2 0.0000 0.05 0.01 0.15
13 Menopause 2 0.2833 2.95 0.52 30.35
14 Stopped_period 2 1.0000 0.96 0.25 3.68
15 Race 3 0.4847 NA NA NA
OR..95..CI.
1 <NA>
2 0.55 (0.32, 0.95)
3 0.45 (0.18, 1.05)
4 <NA>
5 <NA>
6 <NA>
7 <NA>
8 4.91 (0.54, 234.62)
9 0.12 (0.06, 0.23)
10 <NA>
11 0.31 (0.19, 0.50)
12 0.05 (0.01, 0.15)
13 2.95 (0.52, 30.35)
14 0.96 (0.25, 3.68)
15 <NA>
pct_table
1 0.6 | 0.6 | 29 | 65.2 | 4.5 | 0 | 0 | 37.6 | 57.7 | 4.7
2 21.3 | 78.7 | 32.9 | 67.1
3 86.3 | 13.7 | 93.3 | 6.7
4 18.1 | 19.4 | 32.9 | 8.4 | 21.3 | 10.1 | 16.8 | 47 | 10.1 | 16.1
5 10.3 | 26.5 | 19.4 | 7.1 | 5.8 | 31 | 6.7 | 28.2 | 25.5 | 10.1 | 5.4 | 24.2
6 49.5 | 41.1 | 9.3 | 46 | 47.8 | 6.2
7 21.4 | 42.9 | 7.1 | 0 | 7.1 | 0 | 7.1 | 7.1 | 0 | 0 | 7.1 | 10.7 | 39.3 | 7.1 | 0 | 14.3 | 3.6 | 17.9 | 0 | 3.6 | 3.6 | 0
8 3.2 | 96.8 | 0.7 | 99.3
9 8.4 | 91.6 | 44.3 | 55.7
10 91 | 7.7 | 1.3 | 0 | 56.4 | 33.6 | 10.1 | 0
11 40.6 | 59.4 | 69.1 | 30.9
12 2.6 | 97.4 | 34.2 | 65.8
13 3.9 | 96.1 | 1.3 | 98.7
14 3.9 | 96.1 | 4 | 96
15 16.8 | 69.7 | 13.5 | 21.5 | 67.8 | 10.7
desc_table <- lapply(vars, function(v) {
PCOS_Data_Scored |>
count(Level = .data[[v]]) |>
mutate(
Variable = v,
pct = round(100 * n / sum(n), 1),
`n (%)` = paste0(n, " (", pct, "%)")
) |>
select(Variable, Level, `n (%)`)
}) |>
bind_rows()
desc_table Variable Level n (%)
1 Education No Education 1 (0.3%)
2 Education Primary 1 (0.3%)
3 Education Secondary 101 (33.2%)
4 Education Tertiary 187 (61.5%)
5 Education Post-Vocational 14 (4.6%)
6 Smoking Yes 82 (27%)
7 Smoking No 222 (73%)
8 Alcohol Yes 271 (89.1%)
9 Alcohol No 31 (10.2%)
10 Alcohol <NA> 2 (0.7%)
11 Exercise_per_day < 10 minutes 43 (14.1%)
12 Exercise_per_day 10-19 minutes 55 (18.1%)
13 Exercise_per_day 10-39 minutes 121 (39.8%)
14 Exercise_per_day 40-59 minutes 28 (9.2%)
15 Exercise_per_day > 60 minutes 57 (18.8%)
16 Exercise_per_week 1 day 26 (8.6%)
17 Exercise_per_week 2 days 83 (27.3%)
18 Exercise_per_week 3 days 68 (22.4%)
19 Exercise_per_week 4 days 26 (8.6%)
20 Exercise_per_week >5 days 17 (5.6%)
21 Exercise_per_week none 84 (27.6%)
22 Type_of_exercise Mild 105 (34.5%)
23 Type_of_exercise Moderate 98 (32.2%)
24 Type_of_exercise Strenuous 17 (5.6%)
25 Type_of_exercise <NA> 84 (27.6%)
26 Conditions T2D 6 (2%)
27 Conditions H2P 17 (5.6%)
28 Conditions Heart Cond 3 (1%)
29 Conditions Constipation 5 (1.6%)
30 Conditions Cancer 1 (0.3%)
31 Conditions T2D & H2P 6 (2%)
32 Conditions T2D,H2P,& Cancer 1 (0.3%)
33 Conditions T2D,H2P,Heart, & Cancer 1 (0.3%)
34 Conditions T2D & Constipation 1 (0.3%)
35 Conditions T2D & Heart 1 (0.3%)
36 Conditions <NA> 262 (86.2%)
37 Hysterectomy Yes 6 (2%)
38 Hysterectomy No 298 (98%)
39 Contraceptive Yes 79 (26%)
40 Contraceptive No 225 (74%)
41 Alopecia_stage no alopecia 225 (74%)
42 Alopecia_stage Stage 1 62 (20.4%)
43 Alopecia_stage Stage 2 17 (5.6%)
44 Acne Yes 166 (54.6%)
45 Acne No 138 (45.4%)
46 Insulin_resistance_medications Yes 55 (18.1%)
47 Insulin_resistance_medications No 249 (81.9%)
48 Menopause Yes 8 (2.6%)
49 Menopause No 296 (97.4%)
50 Stopped_period Yes 12 (3.9%)
51 Stopped_period No 292 (96.1%)
52 Race African 58 (19.1%)
53 Race East Indian 209 (68.8%)
54 Race Mixed 37 (12.2%)
table(PCOS_Data_Scored$PCOS_Status)
Non-PCOS PCOS
155 149
table(PCOS_Data_Scored$PCOS_Status)
Non-PCOS PCOS
155 149
table(PCOS_Data_Scored$PCOS_Status,PCOS_Data_Scored$Acne)
Yes No
Non-PCOS 63 92
PCOS 103 46
table(PCOS_Data_Scored$Race,PCOS_Data_Scored$PCOS_Status)
Non-PCOS PCOS
African 26 32
East Indian 108 101
Mixed 21 16
library(gt)
vars <- c(
"Education", "Smoking", "Alcohol", "Exercise_per_day",
"Exercise_per_week", "Type_of_exercise", "Conditions",
"Hysterectomy", "Contraceptive", "Alopecia_stage",
"Acne", "Insulin_resistance_medications",
"Menopause", "Stopped_period", "Race"
)
group_var <- "PCOS_Status"
var_labels <- c(
Education = "Education Level",
Smoking = "Smoking Status",
Alcohol = "Alcohol Use",
Exercise_per_day = "Exercise (per day)",
Exercise_per_week = "Exercise (per week)",
Type_of_exercise = "Type of Exercise",
Conditions = "Comorbid Conditions",
Hysterectomy = "History of Hysterectomy",
Contraceptive = "Contraceptive Use",
Alopecia_stage = "Alopecia Stage",
Acne = "Acne",
Insulin_resistance_medications = "IR Medications",
Menopause = "Menopause Status",
Stopped_period = "Stopped Period",
Race = "Race"
)
table1 <- map_dfr(vars, function(v) {
tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored[[group_var]])
test <- if (any(tab < 5)) {
fisher.test(tab)
} else {
chisq.test(tab)
}
df <- as.data.frame(tab) |>
rename(Level = Var1, Group = Var2, n = Freq) |>
mutate(
Level = as.character(Level),
Group = as.character(Group)
) |>
group_by(Group) |>
mutate(
pct = round(n / sum(n) * 100, 1),
`n (%)` = paste0(n, " (", pct, "%)")
) |>
ungroup()
df_wide <- df |>
select(Level, Group, `n (%)`) |>
pivot_wider(names_from = Group, values_from = `n (%)`)
df_wide |>
mutate(
Variable = v,
p_value = test$p.value,
.before = 1
)
}) |>
group_by(Variable) |>
mutate(
p_value = ifelse(row_number() == 1, p_value, NA)
) |>
ungroup()
table1 <- table1 |>
mutate(
Variable = recode(Variable, !!!var_labels),
# indent levels for readability
Level = paste0(" ", Level),
# format p-values
p_value = case_when(
is.na(p_value) ~ "",
p_value < 0.001 ~ "<0.001",
TRUE ~ sprintf("%.3f", p_value)
)
)
gt_table <- table1 |>
gt() |>
cols_label(
Variable = "",
Level = "",
p_value = "p-value"
) |>
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = Variable,
rows = !duplicated(Variable)
)
) |>
cols_align(
align = "center",
-c(Variable, Level)
)
gt_table| p-value | Non-PCOS | PCOS | ||
|---|---|---|---|---|
| Education Level | 0.325 | No Education | 1 (0.6%) | 0 (0%) |
| Education Level | Primary | 1 (0.6%) | 0 (0%) | |
| Education Level | Secondary | 45 (29%) | 56 (37.6%) | |
| Education Level | Tertiary | 101 (65.2%) | 86 (57.7%) | |
| Education Level | Post-Vocational | 7 (4.5%) | 7 (4.7%) | |
| Smoking Status | 0.032 | Yes | 33 (21.3%) | 49 (32.9%) |
| Smoking Status | No | 122 (78.7%) | 100 (67.1%) | |
| Alcohol Use | 0.069 | Yes | 132 (86.3%) | 139 (93.3%) |
| Alcohol Use | No | 21 (13.7%) | 10 (6.7%) | |
| Exercise (per day) | 0.066 | < 10 minutes | 28 (18.1%) | 15 (10.1%) |
| Exercise (per day) | 10-19 minutes | 30 (19.4%) | 25 (16.8%) | |
| Exercise (per day) | 10-39 minutes | 51 (32.9%) | 70 (47%) | |
| Exercise (per day) | 40-59 minutes | 13 (8.4%) | 15 (10.1%) | |
| Exercise (per day) | > 60 minutes | 33 (21.3%) | 24 (16.1%) | |
| Exercise (per week) | 0.465 | 1 day | 16 (10.3%) | 10 (6.7%) |
| Exercise (per week) | 2 days | 41 (26.5%) | 42 (28.2%) | |
| Exercise (per week) | 3 days | 30 (19.4%) | 38 (25.5%) | |
| Exercise (per week) | 4 days | 11 (7.1%) | 15 (10.1%) | |
| Exercise (per week) | >5 days | 9 (5.8%) | 8 (5.4%) | |
| Exercise (per week) | none | 48 (31%) | 36 (24.2%) | |
| Type of Exercise | 0.497 | Mild | 53 (49.5%) | 52 (46%) |
| Type of Exercise | Moderate | 44 (41.1%) | 54 (47.8%) | |
| Type of Exercise | Strenuous | 10 (9.3%) | 7 (6.2%) | |
| Comorbid Conditions | 0.737 | T2D | 3 (21.4%) | 3 (10.7%) |
| Comorbid Conditions | H2P | 6 (42.9%) | 11 (39.3%) | |
| Comorbid Conditions | Heart Cond | 1 (7.1%) | 2 (7.1%) | |
| Comorbid Conditions | PCOS | 0 (0%) | 0 (0%) | |
| Comorbid Conditions | Constipation | 1 (7.1%) | 4 (14.3%) | |
| Comorbid Conditions | Cancer | 0 (0%) | 1 (3.6%) | |
| Comorbid Conditions | T2D & H2P | 1 (7.1%) | 5 (17.9%) | |
| Comorbid Conditions | T2D,H2P,& Cancer | 1 (7.1%) | 0 (0%) | |
| Comorbid Conditions | T2D,H2P,Heart, & Cancer | 0 (0%) | 1 (3.6%) | |
| Comorbid Conditions | T2D & Constipation | 0 (0%) | 1 (3.6%) | |
| Comorbid Conditions | T2D & Heart | 1 (7.1%) | 0 (0%) | |
| History of Hysterectomy | 0.215 | Yes | 5 (3.2%) | 1 (0.7%) |
| History of Hysterectomy | No | 150 (96.8%) | 148 (99.3%) | |
| Contraceptive Use | <0.001 | Yes | 13 (8.4%) | 66 (44.3%) |
| Contraceptive Use | No | 142 (91.6%) | 83 (55.7%) | |
| Alopecia Stage | <0.001 | no alopecia | 141 (91%) | 84 (56.4%) |
| Alopecia Stage | Stage 1 | 12 (7.7%) | 50 (33.6%) | |
| Alopecia Stage | Stage 2 | 2 (1.3%) | 15 (10.1%) | |
| Alopecia Stage | Stage 3 | 0 (0%) | 0 (0%) | |
| Acne | <0.001 | Yes | 63 (40.6%) | 103 (69.1%) |
| Acne | No | 92 (59.4%) | 46 (30.9%) | |
| IR Medications | <0.001 | Yes | 4 (2.6%) | 51 (34.2%) |
| IR Medications | No | 151 (97.4%) | 98 (65.8%) | |
| Menopause Status | 0.283 | Yes | 6 (3.9%) | 2 (1.3%) |
| Menopause Status | No | 149 (96.1%) | 147 (98.7%) | |
| Stopped Period | 1.000 | Yes | 6 (3.9%) | 6 (4%) |
| Stopped Period | No | 149 (96.1%) | 143 (96%) | |
| Race | 0.493 | African | 26 (16.8%) | 32 (21.5%) |
| Race | East Indian | 108 (69.7%) | 101 (67.8%) | |
| Race | Mixed | 21 (13.5%) | 16 (10.7%) |
library(flextable)
Attaching package: 'flextable'
The following objects are masked from 'package:kableExtra':
as_image, footnote
The following object is masked from 'package:purrr':
compose
library(officer)
ft <- flextable(table1)
ft <- ft |>
autofit() |>
theme_booktabs() |>
align(align = "center", part = "all") |>
bold(i = ~ !duplicated(Variable), j = 1) # bold variable names
doc <- read_docx() |>
body_add_par("Table 1. Descriptive Characteristics by PCOS Status", style = "heading 1") |>
body_add_flextable(ft)
print(doc, target = "Table1.docx")cont_vars <- c(
"Age", "Weight", "Info_Score", "Emo_Score", "Respect_Score",
"Quality_Communication", "Negative_Communication",
"Overall_Communication", "Trust_Score", "Hirsutism_score"
)
group_var <- "PCOS_Status"
var_labels <- c(
Age = "Age",
Weight = "Weight",
Info_Score = "Information Score",
Emo_Score = "Emotion Score",
Respect_Score = "Respect Score",
Quality_Communication = "Quality Communication",
Negative_Communication = "Negative Communication",
Overall_Communication = "Overall Communication",
Trust_Score = "Trust Score",
Hirsutism_score = "Hirsutism Score"
)
table2 <- map_dfr(cont_vars, function(v) {
#
test <- t.test(PCOS_Data_Scored[[v]] ~ PCOS_Data_Scored[[group_var]])
# group summaries
sum_stats <- PCOS_Data_Scored |>
group_by(.data[[group_var]]) |>
summarise(
mean = mean(.data[[v]], na.rm = TRUE),
sd = sd(.data[[v]], na.rm = TRUE),
.groups = "drop"
) |>
mutate(
`Mean (SD)` = sprintf("%.2f (%.2f)", mean, sd)
)
wide <- sum_stats |>
select(all_of(group_var), `Mean (SD)`) |>
pivot_wider(names_from = all_of(group_var), values_from = `Mean (SD)`)
wide |>
mutate(
Variable = v,
p_value = test$p.value,
.before = 1
)
})
table2 <- table2 |>
mutate(
Variable = recode(Variable, !!!var_labels),
p_value = case_when(
p_value < 0.001 ~ "<0.001",
TRUE ~ sprintf("%.3f", p_value)
)
)
ft <- flextable(table2)
ft <- ft |>
autofit() |>
theme_booktabs() |>
align(align = "center", part = "all") |>
bold(j = 1) |>
set_header_labels(
Variable = "",
p_value = "p-value"
)
doc <- read_docx() |>
body_add_par("Table 2. Continuous Variables by PCOS Status", style = "heading 1") |>
body_add_flextable(ft)
print(doc, target = "Table2_PCOS.docx")Table with stats values:
library(dplyr)
library(purrr)
library(tidyr)
library(flextable)
library(officer)
cont_vars <- c(
"Age", "Weight", "Info_Score", "Emo_Score", "Respect_Score",
"Quality_Communication", "Negative_Communication",
"Overall_Communication", "Trust_Score", "Hirsutism_score"
)
group_var <- "PCOS_Status"
var_labels <- c(
Age = "Age",
Weight = "Weight",
Info_Score = "Information Score",
Emo_Score = "Emotion Score",
Respect_Score = "Respect Score",
Quality_Communication = "Quality Communication",
Negative_Communication = "Negative Communication",
Overall_Communication = "Overall Communication",
Trust_Score = "Trust Score",
Hirsutism_score = "Hirsutism Score"
)
table2 <- map_dfr(cont_vars, function(v) {
df <- PCOS_Data_Scored |>
filter(!is.na(.data[[v]]), !is.na(.data[[group_var]])) |>
mutate(group = as.factor(.data[[group_var]]))
levels_group <- levels(df$group)
test <- t.test(df[[v]] ~ df$group)
# group summaries
sum_stats <- df |>
group_by(group) |>
summarise(
mean = mean(.data[[v]], na.rm = TRUE),
sd = sd(.data[[v]], na.rm = TRUE),
.groups = "drop"
) |>
mutate(
Mean_SD = sprintf("%.2f (%.2f)", mean, sd)
)
# reshape wide
wide <- sum_stats |>
select(group, Mean_SD) |>
pivot_wider(names_from = group, values_from = Mean_SD)
# mean difference (Group1 - Group2)
est <- test$estimate
mean_diff <- unname(est[1] - est[2])
wide |>
mutate(
Variable = v,
Mean_Diff = sprintf("%.2f", mean_diff),
CI = sprintf("(%.2f, %.2f)", test$conf.int[1], test$conf.int[2]),
t_value = sprintf("%.2f", unname(test$statistic)),
p_value = case_when(
is.na(test$p.value) ~ "",
test$p.value < 0.001 ~ "<0.001",
TRUE ~ sprintf("%.3f", test$p.value)
),
.before = 1
)
})
table2 <- table2 |>
mutate(
Variable = recode(Variable, !!!var_labels)
)
ft <- flextable(table2)
ft <- ft |>
theme_booktabs() |>
autofit() |>
align(align = "center", part = "all") |>
bold(j = 1) |>
set_header_labels(
Variable = "",
Mean_Diff = "Mean Difference",
CI = "95% CI",
t_value = "t",
p_value = "p-value"
)
doc <- read_docx() |>
body_add_par("Table 2. Continuous Variables by PCOS Status", style = "heading 1") |>
body_add_par("Values are mean (SD) unless otherwise indicated. Mean difference represents Group 1 minus Group 2.", style = "Normal") |>
body_add_flextable(ft)
print(doc, target = "Table2_PCOS.docx")
table(PCOS_Data_Scored$PCOS_Status)
Non-PCOS PCOS
155 149
table1 <- map_dfr(vars, function(v) {
tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored[[group_var]])
use_fisher <- any(tab < 5)
test <- if (use_fisher) {
fisher.test(tab)
} else {
chisq.test(tab)
}
df <- as.data.frame(tab) |>
rename(Level = Var1, Group = Var2, n = Freq) |>
mutate(
Level = as.character(Level),
Group = as.character(Group)
) |>
group_by(Group) |>
mutate(
pct = round(n / sum(n) * 100, 1),
`n (%)` = paste0(n, " (", pct, "%)")
) |>
ungroup()
df_wide <- df |>
select(Level, Group, `n (%)`) |>
pivot_wider(names_from = Group, values_from = `n (%)`)
df_wide |>
mutate(
Variable = v,
test_type = ifelse(use_fisher, "Fisher", "χ²"),
statistic = ifelse(use_fisher, NA, unname(test$statistic)),
p_value = test$p.value,
.before = 1
)
}) |>
group_by(Variable) |>
mutate(
p_value = ifelse(row_number() == 1, p_value, NA),
test_type = ifelse(row_number() == 1, test_type, NA),
statistic = ifelse(row_number() == 1, statistic, NA)
) |>
ungroup()
table1 <- table1 |>
mutate(
Variable = recode(Variable, !!!var_labels),
Level = paste0(" ", Level),
p_value = case_when(
is.na(p_value) ~ "",
p_value < 0.001 ~ "<0.001",
TRUE ~ sprintf("%.3f", p_value)
),
statistic = case_when(
is.na(statistic) ~ "",
TRUE ~ sprintf("%.2f", statistic)
)
)
ft <- flextable(table1)
ft <- ft |>
autofit() |>
theme_booktabs() |>
align(align = "center", part = "all") |>
bold(i = ~ !duplicated(Variable), j = 1) |>
set_header_labels(
Variable = "",
Level = "",
test_type = "Test",
statistic = "Statistic",
p_value = "p-value"
)
doc <- read_docx() |>
body_add_par("Table 1. Descriptive Characteristics by PCOS Status", style = "heading 1") |>
body_add_par("Values are n (%). χ² = Chi-square test; Fisher = Fisher’s exact test.", style = "Normal") |>
body_add_flextable(ft)
print(doc, target = "Table1.docx")