Trinidad and Tabago Data

Measures:

PPEQ: Picker Patient Experience Questionnaire

PHCPC_F1: Patient-Health Care Provider Communication Scale - Factor 1

PHCPC_F2: Patient-Health Care Provider Communication Scale - Factor 2

HCRT: Health Care Relationship Trust Scale

Perceived Stigma Scale had no data

setwd("C:/Work Files/Collaborations/Kendall PCOS/T and T Project")

Packages Used

library(haven) #reading data from spss, sas, stata
library(tidyverse) #Data management
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library (corrplot) #correlation tables
corrplot 0.95 loaded
library(psych) ## basic psychometrics and statistics

Attaching package: 'psych'

The following objects are masked from 'package:ggplot2':

    %+%, alpha
library(finalfit) #Testing Assumptions
library (performance) #Testing Assumptions
library(MVN) #Assessing multivariate normality

Attaching package: 'MVN'

The following object is masked from 'package:psych':

    mardia
library(knitr) #Making Tables
library(kableExtra) #Making Tables

Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows
library(apaTables) #Generating APA formated tables
library(effectsize)

Attaching package: 'effectsize'

The following object is masked from 'package:psych':

    phi
library(mice)

Attaching package: 'mice'

The following object is masked from 'package:stats':

    filter

The following objects are masked from 'package:base':

    cbind, rbind
library(naniar)

Data Input

PCOS_Data <-read.csv("MBBS_Group_2_Data.csv")
PCOS_Data <-(PCOS_Data[1:304,])
table(PCOS_Data$PCOS_Status)

  0   1 
155 149 
Data Management
PCOS_Data$PPEQ_11 <- as.numeric(PCOS_Data$PPEQ_11)
Warning: NAs introduced by coercion
PCOS_Data$Age <-as.numeric(PCOS_Data$Age)

PCOS_Data$Exercise_per_week <-factor(PCOS_Data$Exercise_per_week,
                                     levels = c(1,2,3,4,5,6),
                                     labels = c("1 day", "2 days", "3 days", "4 days", ">5 days", "none"))

PCOS_Data$Type_of_exercise <- factor(PCOS_Data$Type_of_exercise,
                                     levels = c(1,2,3),
                                     labels = c("Mild", "Moderate", "Strenuous"))

PCOS_Data$Hysterectomy <-factor(PCOS_Data$Hysterectomy,
                                levels = c(1,2),
                                labels = c("Yes", "No"))

PCOS_Data$Education <- factor(PCOS_Data$Education,
                              levels = c(1,2,3,4,5),
                              labels = c("No Education","Primary","Secondary","Tertiary", "Post-Vocational"))

PCOS_Data$Smoking <- factor(PCOS_Data$Smoking,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Alcohol <- factor(PCOS_Data$Alcohol,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Contraceptive <- factor(PCOS_Data$Contraceptive,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Menopause <- factor(PCOS_Data$Menopause,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Acne <- factor(PCOS_Data$Acne,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Stopped_period <- factor(PCOS_Data$Stopped_period,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Alopecia_stage <- factor(PCOS_Data$Alopecia_stage,
                                   levels = c(0,1,2,3),
                                   labels = c("no alopecia", "Stage 1", "Stage 2", "Stage 3"))


PCOS_Data$Insulin_resistance_medications <- factor(PCOS_Data$Insulin_resistance_medications,
                            levels = c(1,2),
                            labels = c("Yes", "No"))                                    

PCOS_Data$Conditions <- factor(PCOS_Data$Conditions,
                               levels = c(1,2,3,4,5,6,7,8,9,10,11,12),
                               labels = c("T2D","H2P","Heart Cond","PCOS","Constipation","Cancer","T2D & H2P", "T2D,H2P,& Cancer", "T2D,H2P,Heart, & Cancer","T2D & Constipation","T2D & Heart","T2D & H2P"))

PCOS_Data$PCOS_Status <- factor(PCOS_Data$PCOS_Status,
                                levels = c(0,1),
                                labels = c("Non-PCOS","PCOS"))

PCOS_Data$Race <- factor(PCOS_Data$Race,
                         levels = c(1,2,3),
                         labels = c("African","East Indian","Mixed"))

PCOS_Data$Exercise_per_day <- factor(PCOS_Data$Exercise_per_day,
                                            levels = c(1,2,3,4,5),
                                            labels = c("< 10 minutes","10-19 minutes", "10-39 minutes","40-59 minutes","> 60 minutes"))
PPEQ Scoring
###INFO_Score


items <- c("PPEQ_1","PPEQ_2","PPEQ_14","PPEQ_15","PPEQ_16")

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(items), ~ ifelse(. == 4, NA, .)))

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(items), ~ 4 - ., .names = "{.col}_r"))

info_items_r <- c("PPEQ_1_r","PPEQ_2_r","PPEQ_14_r","PPEQ_15_r","PPEQ_16_r")

PCOS_Data <- PCOS_Data |>
  mutate(
    Info_Score = ifelse(
      rowSums(!is.na(pick(all_of(info_items_r)))) >= 3,
      rowMeans(pick(all_of(info_items_r)), na.rm = TRUE),
      NA
    )
  )

#### Emotion_Score

emo_items <- c("PPEQ_4","PPEQ_8","PPEQ_9")

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(emo_items), ~ ifelse(. == 4, NA, .)))

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(emo_items), ~ 4 - ., .names = "{.col}_r"))

emo_items_r <- c("PPEQ_4_r","PPEQ_8_r","PPEQ_9_r")

PCOS_Data <- PCOS_Data |>
  mutate(
    Emo_Score = ifelse(
      rowSums(!is.na(pick(all_of(emo_items_r)))) >= 2,
      rowMeans(pick(all_of(emo_items_r)), na.rm = TRUE),
      NA
    )
  )

#### Respect Score

respect_items <- c("PPEQ_5", "PPEQ_7")

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(respect_items), ~ ifelse(. == 4, NA, .)))

PCOS_Data <- PCOS_Data |>
  mutate(
    PPEQ_5_r = 4 - PPEQ_5,
      )

PCOS_Data <- PCOS_Data |>
  mutate(
    Respect_Score = ifelse(
      rowSums(!is.na(pick(PPEQ_5_r, PPEQ_7))) >= 1,
      rowMeans(pick(PPEQ_5_r, PPEQ_7), na.rm = TRUE),
      NA
    )
  )
Patient Healthcare Provider Communication Scale
qc_items <- paste0("PHCPC_F1_", 1:17)

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(qc_items), as.numeric))

PCOS_Data <- PCOS_Data |>
  mutate(
    Quality_Communication = rowSums(
      pick(all_of(qc_items)),
      na.rm = TRUE
    )
  )

nc_items <- paste0("PHCPC_F2_", 1:4)

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(nc_items), as.numeric))

PCOS_Data <- PCOS_Data |>
  mutate(
    Negative_Communication = rowSums(
      pick(all_of(nc_items)),
      na.rm = TRUE
    )
  )

### Overall Communication

range(PCOS_Data$Negative_Communication, na.rm = TRUE)
[1]  4 16
PCOS_Data <- PCOS_Data |>
  mutate(
    Negative_Communication_r = (16 + 4) - Negative_Communication
  )

PCOS_Data <- PCOS_Data |>
  mutate(
    Overall_Communication = Quality_Communication + Negative_Communication_r
  )
Health Care Provider Trust Scale
neg_items <- c("HCRT_1", "HCRT_11", "HCRT_13")



PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(neg_items), ~ 4 - ., .names = "{.col}_r"))

trust_items <- c(
  "HCRT_1_r",
  "HCRT_2", "HCRT_3", "HCRT_4", "HCRT_5",
  "HCRT_6", "HCRT_7", "HCRT_8", "HCRT_9",
  "HCRT_10", "HCRT_11_r", "HCRT_12", "HCRT_13_r",
  "HCRT_14"
)

PCOS_Data <- PCOS_Data |>
  mutate(
    trust_valid = rowSums(!is.na(pick(all_of(trust_items)))),
    Trust_Score = ifelse(
      trust_valid >= 10,
      rowMeans(pick(all_of(trust_items)), na.rm = TRUE),
      NA
    )
  ) |>
  select(-trust_valid)

New Data Set with Scored Scales

PCOS_Data_Scored <-write.csv(PCOS_Data,"PCOS_Data_Scored.csv")
PCOS_Data_Scored <-write_sav(PCOS_Data,"PCOS_Data_Scored.sav")

Descriptive Statistics

describe(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])
                       vars   n   mean    sd median trimmed   mad    min max
Height                    1 303 161.53 10.10 162.60  162.29  7.41 120.00 190
Weight                    2 303  69.15 15.46  67.00   67.85 11.86   6.00 143
Hirsutism_score           3 304   5.80  6.16   3.00    5.02  4.45   0.00  26
Age                       4 304  29.07  7.39  28.00   28.55  7.41  18.00  45
Emo_Score                 5 294   1.97  0.50   2.00    1.96  0.49   1.00   3
Respect_Score             6 304   1.62  0.53   1.50    1.58  0.74   1.00   3
Quality_Communication     7 304  47.23 11.82  47.00   47.62 11.86  17.00  68
Negative_Communication    8 304   7.26  3.06   6.50    6.82  2.22   4.00  16
Overall_Communication     9 304  59.96 14.02  61.00   60.77 13.34  21.00  84
Trust_Score              10 304   2.61  0.84   2.71    2.67  0.85   0.21   4
                        range  skew kurtosis   se
Height                  70.00 -1.19     3.73 0.58
Weight                 137.00  1.12     3.99 0.89
Hirsutism_score         26.00  0.83    -0.38 0.35
Age                     27.00  0.52    -0.79 0.42
Emo_Score                2.00  0.16    -0.40 0.03
Respect_Score            2.00  0.36    -0.68 0.03
Quality_Communication   51.00 -0.29    -0.46 0.68
Negative_Communication  12.00  1.13     0.78 0.18
Overall_Communication   63.00 -0.48    -0.19 0.80
Trust_Score              3.79 -0.57     0.08 0.05
describe.by(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)],PCOS_Data_Scored$PCOS_Status)
Warning in describe.by(PCOS_Data_Scored[, c(2, 3, 18, 20, 82, 84, 85, 86, :
describe.by is deprecated.  Please use the describeBy function

 Descriptive statistics by group 
group: Non-PCOS
                       vars   n   mean    sd median trimmed   mad    min max
Height                    1 154 160.83 10.55 162.00  161.80  7.04 120.00 190
Weight                    2 155  64.74 11.05  63.00   63.93  8.90  44.50 125
Hirsutism_score           3 155   3.62  4.95   1.00    2.83  1.48   0.00  18
Age                       4 155  29.45  7.55  29.00   28.90  8.90  18.00  45
Emo_Score                 5 146   2.03  0.46   2.00    2.02  0.49   1.00   3
Respect_Score             6 155   1.65  0.57   1.50    1.60  0.74   1.00   3
Quality_Communication     7 155  47.17 12.42  48.00   47.74 11.86  17.00  68
Negative_Communication    8 155   7.14  3.30   6.00    6.58  2.97   4.00  16
Overall_Communication     9 155  60.03 15.00  61.00   61.14 13.34  21.00  84
Trust_Score              10 155   2.58  0.93   2.71    2.66  0.85   0.21   4
                       range  skew kurtosis   se
Height                 70.00 -1.16     3.23 0.85
Weight                 80.50  1.35     4.65 0.89
Hirsutism_score        18.00  1.14    -0.23 0.40
Age                    27.00  0.52    -0.79 0.61
Emo_Score               2.00  0.37    -0.35 0.04
Respect_Score           2.00  0.45    -0.63 0.05
Quality_Communication  51.00 -0.42    -0.40 1.00
Negative_Communication 12.00  1.27     0.87 0.27
Overall_Communication  63.00 -0.65    -0.12 1.20
Trust_Score             3.79 -0.67    -0.15 0.07
------------------------------------------------------------ 
group: PCOS
                       vars   n   mean    sd median trimmed   mad    min   max
Height                    1 149 162.24  9.59 163.00  162.78  7.41 121.00 190.0
Weight                    2 148  73.78 17.91  72.40   72.76 14.68   6.00 143.0
Hirsutism_score           3 149   8.07  6.48   7.00    7.60  8.90   0.00  26.0
Age                       4 149  28.68  7.23  27.00   28.22  7.41  18.00  45.0
Emo_Score                 5 148   1.91  0.53   2.00    1.90  0.49   1.00   3.0
Respect_Score             6 149   1.59  0.48   1.50    1.57  0.74   1.00   2.5
Quality_Communication     7 149  47.28 11.21  47.00   47.45 11.86  20.00  68.0
Negative_Communication    8 149   7.39  2.78   7.00    7.09  2.97   4.00  16.0
Overall_Communication     9 149  59.89 12.98  59.00   60.22 13.34  29.00  84.0
Trust_Score              10 149   2.64  0.74   2.71    2.66  0.74   0.36   4.0
                        range  skew kurtosis   se
Height                  69.00 -1.17     4.21 0.79
Weight                 137.00  0.67     2.87 1.47
Hirsutism_score         26.00  0.49    -0.76 0.53
Age                     27.00  0.50    -0.87 0.59
Emo_Score                2.00  0.11    -0.61 0.04
Respect_Score            1.50  0.11    -1.20 0.04
Quality_Communication   48.00 -0.09    -0.67 0.92
Negative_Communication  12.00  0.91     0.42 0.23
Overall_Communication   55.00 -0.21    -0.52 1.06
Trust_Score              3.64 -0.27    -0.04 0.06
PCOS_Data_Scored_No_NA <- na.omit(PCOS_Data_Scored)

apa.cor.table(PCOS_Data_Scored[,c(2,3,18,20,82,84,85,86,88,92)])


Means, standard deviations, and correlations with confidence intervals
 

  Variable                  M      SD    1            2            3           
  1. Height                 161.53 10.10                                       
                                                                               
  2. Weight                 69.15  15.46 .27**                                 
                                         [.16, .37]                            
                                                                               
  3. Hirsutism_score        5.80   6.16  .05          .19**                    
                                         [-.06, .17]  [.08, .30]               
                                                                               
  4. Age                    29.07  7.39  .06          .16**        -.02        
                                         [-.05, .17]  [.05, .27]   [-.14, .09] 
                                                                               
  5. Emo_Score              1.97   0.50  .14*         -.13*        .15*        
                                         [.02, .25]   [-.24, -.01] [.03, .26]  
                                                                               
  6. Respect_Score          1.62   0.53  -.11         -.05         -.05        
                                         [-.22, .00]  [-.17, .06]  [-.16, .06] 
                                                                               
  7. Quality_Communication  47.23  11.82 .17**        -.04         .11*        
                                         [.06, .28]   [-.15, .07]  [.00, .22]  
                                                                               
  8. Negative_Communication 7.26   3.06  -.13*        .04          -.16**      
                                         [-.24, -.02] [-.08, .15]  [-.27, -.05]
                                                                               
  9. Overall_Communication  59.96  14.02 .18**        -.04         .13*        
                                         [.06, .28]   [-.15, .07]  [.02, .24]  
                                                                               
  10. Trust_Score           2.61   0.84  .15**        -.04         .19**       
                                         [.04, .26]   [-.15, .08]  [.08, .30]  
                                                                               
  4           5            6            7            8            9         
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
  .03                                                                       
  [-.09, .14]                                                               
                                                                            
  .15**       -.21**                                                        
  [.04, .26]  [-.32, -.10]                                                  
                                                                            
  -.08        .41**        -.54**                                           
  [-.19, .04] [.31, .50]   [-.62, -.46]                                     
                                                                            
  .15**       -.23**       .42**        -.66**                              
  [.04, .26]  [-.34, -.12] [.32, .51]   [-.72, -.59]                        
                                                                            
  -.10        .40**        -.55**       .99**        -.77**                 
  [-.21, .02] [.29, .49]   [-.62, -.46] [.98, .99]   [-.81, -.72]           
                                                                            
  -.11        .34**        -.54**       .85**        -.72**       .88**     
  [-.22, .00] [.23, .44]   [-.62, -.46] [.82, .88]   [-.77, -.66] [.85, .90]
                                                                            

Note. M and SD are used to represent mean and standard deviation, respectively.
Values in square brackets indicate the 95% confidence interval.
The confidence interval is a plausible range of population correlations 
that could have caused the sample correlation (Cumming, 2014).
 * indicates p < .05. ** indicates p < .01.
 
res <- cor(PCOS_Data_Scored_No_NA[,c(2,3,18,20,82,84,85,86,88,92)])
round(res, 2)
                       Height Weight Hirsutism_score   Age Emo_Score
Height                   1.00  -0.27            0.10  0.22      0.17
Weight                  -0.27   1.00            0.16  0.19     -0.24
Hirsutism_score          0.10   0.16            1.00  0.12      0.17
Age                      0.22   0.19            0.12  1.00     -0.20
Emo_Score                0.17  -0.24            0.17 -0.20      1.00
Respect_Score            0.35  -0.48            0.33  0.41     -0.01
Quality_Communication   -0.11   0.15            0.13  0.05      0.37
Negative_Communication   0.17  -0.04           -0.30  0.16     -0.35
Overall_Communication   -0.15   0.16            0.21  0.01      0.46
Trust_Score              0.01   0.01            0.10 -0.14      0.50
                       Respect_Score Quality_Communication
Height                          0.35                 -0.11
Weight                         -0.48                  0.15
Hirsutism_score                 0.33                  0.13
Age                             0.41                  0.05
Emo_Score                      -0.01                  0.37
Respect_Score                   1.00                 -0.12
Quality_Communication          -0.12                  1.00
Negative_Communication          0.09                  0.03
Overall_Communication          -0.14                  0.96
Trust_Score                     0.01                  0.70
                       Negative_Communication Overall_Communication Trust_Score
Height                                   0.17                 -0.15        0.01
Weight                                  -0.04                  0.16        0.01
Hirsutism_score                         -0.30                  0.21        0.10
Age                                      0.16                  0.01       -0.14
Emo_Score                               -0.35                  0.46        0.50
Respect_Score                            0.09                 -0.14        0.01
Quality_Communication                    0.03                  0.96        0.70
Negative_Communication                   1.00                 -0.24       -0.19
Overall_Communication                   -0.24                  1.00        0.74
Trust_Score                             -0.19                  0.74        1.00
corrplot(res,
         type = "lower",
         order = "hclust",
         tl.col = "black",
         tl.srt = 45,
         addCoef.col = "black",
         sig.level = 0.05,
         insig = "blank")

png("correlation_plot.png", width = 2000, height = 2000, res = 300)

corrplot(res,
         type = "lower",
         order = "hclust",
         tl.col = "black",
         tl.srt = 45,
         addCoef.col = "black",
         number.cex = 0.6,
        
         insig = "blank")

dev.off()
png 
  2 
describe(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])%>%
  knitr::kable(digits = 3, format="html", booktabs=TRUE, caption="Table 1. Descriptives")%>%
  kable_classic(full_width = F, html_font = "Cambria")
Table 1. Descriptives
vars n mean sd median trimmed mad min max range skew kurtosis se
Height 1 303 161.526 10.099 162.600 162.291 7.413 120.000 190 70.000 -1.185 3.731 0.580
Weight 2 303 69.154 15.456 67.000 67.854 11.861 6.000 143 137.000 1.121 3.991 0.888
Hirsutism_score 3 304 5.803 6.161 3.000 5.025 4.448 0.000 26 26.000 0.831 -0.376 0.353
Age 4 304 29.069 7.395 28.000 28.549 7.413 18.000 45 27.000 0.516 -0.794 0.424
Emo_Score 5 294 1.970 0.499 2.000 1.963 0.494 1.000 3 2.000 0.162 -0.400 0.029
Respect_Score 6 304 1.625 0.532 1.500 1.580 0.741 1.000 3 2.000 0.361 -0.679 0.031
Quality_Communication 7 304 47.227 11.821 47.000 47.623 11.861 17.000 68 51.000 -0.290 -0.462 0.678
Negative_Communication 8 304 7.263 3.057 6.500 6.824 2.224 4.000 16 12.000 1.131 0.782 0.175
Overall_Communication 9 304 59.964 14.022 61.000 60.770 13.343 21.000 84 63.000 -0.483 -0.189 0.804
Trust_Score 10 304 2.611 0.841 2.714 2.665 0.847 0.214 4 3.786 -0.569 0.081 0.048
random <- rnorm(nrow(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)]), 7)
#The command above generates a random variable with the same number of rows (values)as the dataset
hist(random)#just to check the distribtuion of this new variable

fakereg <-lm(random ~., data = PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])
##runs a regression with the new random variable as the dv and all the variables in the dataset as IVs
##This generates a set of residuals in order to check the assumptions

##The following set of code just scales the residuals
standardized <- rstudent(fakereg)
fitted <- scale(fakereg$fitted.values)
hist(fitted)

check_model (fakereg) 

MVN::mvn(
  PCOS_Data_Scored_No_NA[, c(2,3,18,20,82,84,85,86,88,92)]
)
$multivariate_normality
           Test Statistic p.value     Method          MVN
1 Henze-Zirkler     1.006   0.002 asymptotic ✗ Not normal

$univariate_normality
               Test               Variable Statistic p.value    Normality
1  Anderson-Darling                 Height     0.297    0.56     ✓ Normal
2  Anderson-Darling                 Weight     1.521  <0.001 ✗ Not normal
3  Anderson-Darling        Hirsutism_score     1.014   0.009 ✗ Not normal
4  Anderson-Darling                    Age     1.163   0.004 ✗ Not normal
5  Anderson-Darling              Emo_Score     0.779   0.036 ✗ Not normal
6  Anderson-Darling          Respect_Score     1.058   0.007 ✗ Not normal
7  Anderson-Darling  Quality_Communication     0.408   0.318     ✓ Normal
8  Anderson-Darling Negative_Communication     0.982   0.011 ✗ Not normal
9  Anderson-Darling  Overall_Communication     0.281   0.605     ✓ Normal
10 Anderson-Darling            Trust_Score     0.565   0.126     ✓ Normal

$descriptives
                 Variable  n    Mean Std.Dev  Median     Min     Max    25th
1                  Height 22 165.864   5.676 165.000 157.000 178.000 162.250
2                  Weight 22  74.286  12.689  71.900  62.000 113.600  66.250
3         Hirsutism_score 22   7.636   5.551   9.000   0.000  15.000   2.250
4                     Age 22  34.682   8.191  39.000  19.000  45.000  29.250
5               Emo_Score 22   2.197   0.456   2.000   1.333   3.000   2.000
6           Respect_Score 22   1.705   0.549   1.750   1.000   2.500   1.125
7   Quality_Communication 22  52.727   7.598  52.000  40.000  68.000  45.500
8  Negative_Communication 22   6.591   2.153   6.000   4.000  12.000   5.000
9   Overall_Communication 22  66.136   7.827  64.500  53.000  84.000  59.500
10            Trust_Score 22   2.981   0.555   2.857   2.000   3.786   2.589
      75th   Skew Kurtosis
1  170.000  0.334    2.465
2   77.750  1.714    5.562
3   12.750 -0.266    1.489
4   41.000 -0.697    2.093
5    2.333  0.319    2.481
6    2.000  0.016    1.725
7   57.000  0.256    2.366
8    7.750  1.045    3.356
9   72.000  0.311    2.478
10   3.554  0.047    1.795

$data
    Height Weight Hirsutism_score Age Emo_Score Respect_Score
26     163   63.0               3  19  2.666667           1.0
93     170   67.0               0  21  3.000000           1.0
106    172   68.0               0  37  1.666667           2.0
110    168   63.0               6  39  2.000000           1.5
116    163   78.0               0  39  2.333333           1.5
118    165   78.0              13  42  2.000000           1.5
119    164   68.0               7  41  1.333333           2.5
129    162   78.0              11  32  2.000000           2.0
137    178   66.0               9  41  2.333333           2.5
138    160   64.0              12  39  2.333333           2.5
139    170   72.0              11  42  2.666667           2.5
140    167   77.0              13  30  3.000000           2.0
148    170   62.0              12  40  3.000000           2.0
153    160   72.0              14  24  2.000000           2.0
155    169   77.0              15  42  2.333333           1.5
169    176   67.0              14  27  2.000000           2.0
221    160   72.7               2  20  1.666667           1.0
223    165   71.8               0  45  2.000000           2.0
224    170  113.6               9  42  1.666667           1.0
229    157   95.9              13  29  2.333333           1.0
246    163   65.0               0  32  2.000000           1.5
256    157   95.3               4  40  2.000000           1.0
    Quality_Communication Negative_Communication Overall_Communication
26                     61                      6                    75
93                     44                      6                    58
106                    51                      5                    66
110                    47                      6                    61
116                    51                      6                    65
118                    51                      7                    64
119                    40                      7                    53
129                    44                      5                    59
137                    53                      9                    64
138                    56                      5                    71
139                    57                      5                    72
140                    59                      5                    74
148                    56                      4                    72
153                    51                      8                    63
155                    58                      5                    73
169                    44                      9                    55
221                    44                      5                    59
223                    56                     12                    64
224                    57                      8                    69
229                    68                      4                    84
246                    67                     11                    76
256                    45                      7                    58
    Trust_Score
26     3.785714
93     2.571429
106    2.857143
110    2.642857
116    2.928571
118    2.857143
119    2.142857
129    2.714286
137    3.142857
138    3.500000
139    3.571429
140    3.785714
148    3.642857
153    2.428571
155    2.000000
169    2.714286
221    2.714286
223    2.571429
224    3.357143
229    3.714286
246    3.571429
256    2.357143

$subset
NULL

$outlierMethod
[1] "none"

attr(,"class")
[1] "mvn"
vis_miss(PCOS_Data_Scored)

gg_miss_var(PCOS_Data_Scored)

md.pattern(PCOS_Data_Scored[,c(2,3,18,20,82,84,85,86,88,92)])

    Hirsutism_score Age Respect_Score Quality_Communication
292               1   1             1                     1
10                1   1             1                     1
1                 1   1             1                     1
1                 1   1             1                     1
                  0   0             0                     0
    Negative_Communication Overall_Communication Trust_Score Height Weight
292                      1                     1           1      1      1
10                       1                     1           1      1      1
1                        1                     1           1      1      0
1                        1                     1           1      0      1
                         0                     0           0      1      1
    Emo_Score   
292         1  0
10          0  1
1           1  1
1           1  1
           10 12

Analyses

### Age 

str(PCOS_Data$PCOS_Status)
 Factor w/ 2 levels "Non-PCOS","PCOS": 2 2 2 2 2 2 2 2 2 2 ...
Age_model <- aov(Age ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Age ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Age by PCOS_Status
t = 0.9049, df = 302, p-value = 0.3662
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.9013285  2.4359464
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              29.44516               28.67785 
summary(Age_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1     45   44.73   0.817  0.367
Residuals   302  16525   54.72               
cohens_d(
  Age ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |        95% CI
-------------------------
0.10      | [-0.12, 0.33]

- Estimated using pooled SD.
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_age = mean(Age, na.rm = TRUE),
    sd_age = sd(Age, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_age sd_age
  <fct>       <int>    <dbl>  <dbl>
1 Non-PCOS      155     29.4   7.55
2 PCOS          149     28.7   7.23
ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Age)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  labs(
    title = "Age by PCOS Status",
    x = "PCOS Status",
    y = "Age"
  )

### Weight 

Weight_model <- aov(Weight ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Weight ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Weight by PCOS_Status
t = -5.2563, df = 242.71, p-value = 3.22e-07
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -12.422722  -5.650041
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              64.74065               73.77703 
summary(Weight_model)
             Df Sum Sq Mean Sq F value   Pr(>F)    
PCOS_Status   1   6182    6182   28.21 2.12e-07 ***
Residuals   301  65965     219                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
1 observation deleted due to missingness
cohens_d(
  Weight ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Warning: Missing values detected. NAs dropped.
Cohen's d |         95% CI
--------------------------
-0.61     | [-0.84, -0.38]

- Estimated using pooled SD.
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Weight = mean(Weight, na.rm = TRUE),
    sd_Weight = sd(Weight, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Weight sd_Weight
  <fct>       <int>       <dbl>     <dbl>
1 Non-PCOS      155        64.7      11.1
2 PCOS          149        73.8      17.9
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Weight), is.finite(Weight), !is.na(PCOS_Status))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Weight)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  labs(
    title = "Weight by PCOS Status",
    x = "PCOS Status",
    y = "Weight"
  )
Warning: Removed 1 row containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

#### PPEQ Info Score


Info_Score_model <- aov(Info_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Info_Score ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Info_Score by PCOS_Status
t = -0.030664, df = 294.03, p-value = 0.9756
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.1169905  0.1134007
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              2.022478               2.024273 
cohens_d(
 Info_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Warning: Missing values detected. NAs dropped.
Cohen's d |        95% CI
-------------------------
-3.53e-03 | [-0.23, 0.22]

- Estimated using pooled SD.
summary(Info_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1   0.00 0.00024   0.001  0.976
Residuals   299  77.31 0.25856               
3 observations deleted due to missingness
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Info_Score = mean(Info_Score, na.rm = TRUE),
    sd_Info_Score = sd(Info_Score, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Info_Score sd_Info_Score
  <fct>       <int>           <dbl>         <dbl>
1 Non-PCOS      155            2.02         0.545
2 PCOS          149            2.02         0.469
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Info_Score), is.finite(Info_Score), !is.na(Info_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Info_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  labs(
    title = "Info_Score by PCOS Status",
    x = "PCOS Status",
    y = "Info_Score"
  )
Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).

#### PPEQ Emotion Score


Emo_Score_model <- aov(Emo_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Emo_Score ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Emo_Score by PCOS_Status
t = 2.2102, df = 288.11, p-value = 0.02788
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 0.0139822 0.2414479
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              2.034247               1.906532 
cohens_d(
  Emo_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Warning: Missing values detected. NAs dropped.
Cohen's d |       95% CI
------------------------
0.26      | [0.03, 0.49]

- Estimated using pooled SD.
summary(Emo_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)  
PCOS_Status   1   1.20  1.1988   4.876  0.028 *
Residuals   292  71.79  0.2458                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
10 observations deleted due to missingness
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Emo_Score = mean(Emo_Score, na.rm = TRUE),
    sd_Emo_Score = sd(Emo_Score, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Emo_Score sd_Emo_Score
  <fct>       <int>          <dbl>        <dbl>
1 Non-PCOS      155           2.03        0.462
2 PCOS          149           1.91        0.527
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Emo_Score), is.finite(Emo_Score), !is.na(Emo_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Emo_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(0, 4)) +
  labs(
    title = "Emotion_Score by PCOS Status",
    x = "PCOS Status",
    y = "Emo_Score"
  )
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 10 rows containing missing values or values outside the scale range
(`geom_point()`).

#### Respect Score

Respect_Score_model <- aov(Respect_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Respect_Score ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Respect_Score by PCOS_Status
t = 1.0008, df = 296.98, p-value = 0.3177
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.05883484  0.18059280
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              1.654839               1.593960 
cohens_d(
  Respect_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |        95% CI
-------------------------
0.11      | [-0.11, 0.34]

- Estimated using pooled SD.
summary(Respect_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1   0.28  0.2816   0.995  0.319
Residuals   302  85.47  0.2830               
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Respect_Score = mean(Respect_Score, na.rm = TRUE),
    sd_Respect_Score = sd(Respect_Score, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Respect_Score sd_Respect_Score
  <fct>       <int>              <dbl>            <dbl>
1 Non-PCOS      155               1.65            0.574
2 PCOS          149               1.59            0.484
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Respect_Score), is.finite(Respect_Score), !is.na(Respect_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Respect_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(0, 4)) +
  labs(
    title = "Respect_Score by PCOS Status",
    x = "PCOS Status",
    y = "Respect_Score"
  )

#### Quality Communication

Quality_Communication_Score_model <- aov(Quality_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Quality_Communication ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Quality_Communication by PCOS_Status
t = -0.07943, df = 300.81, p-value = 0.9367
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -2.775589  2.560218
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              47.17419               47.28188 
cohens_d(
  Quality_Communication ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |        95% CI
-------------------------
-9.09e-03 | [-0.23, 0.22]

- Estimated using pooled SD.
summary(Quality_Communication_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1      1    0.88   0.006  0.937
Residuals   302  42340  140.20               
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Quality_Communication = mean(Quality_Communication, na.rm = TRUE),
    sd_Quality_Communication = sd(Quality_Communication, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Quality_Communication sd_Quality_Communication
  <fct>       <int>                      <dbl>                    <dbl>
1 Non-PCOS      155                       47.2                     12.4
2 PCOS          149                       47.3                     11.2
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Quality_Communication), is.finite(Quality_Communication), !is.na(Quality_Communication))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Quality_Communication)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(15, 70)) +
  labs(
    title = "Quality_Communication_Score by PCOS Status",
    x = "PCOS Status",
    y = "Quality_Communication_Score"
  )

##### Negative Communication

Negative_Communication_Score_model <- aov(Negative_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Negative_Communication ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Negative_Communication by PCOS_Status
t = -0.70705, df = 296.89, p-value = 0.4801
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.9357275  0.4410750
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              7.141935               7.389262 
cohens_d(
  Negative_Communication ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |        95% CI
-------------------------
-0.08     | [-0.31, 0.14]

- Estimated using pooled SD.
summary(Negative_Communication_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1    4.6   4.647   0.497  0.482
Residuals   302 2826.3   9.359               
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Negative_Communication = mean(Negative_Communication, na.rm = TRUE),
    sd_Negative_Communication = sd(Negative_Communication, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Negative_Communication sd_Negative_Communication
  <fct>       <int>                       <dbl>                     <dbl>
1 Non-PCOS      155                        7.14                      3.30
2 PCOS          149                        7.39                      2.78
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Negative_Communication), is.finite (Negative_Communication), !is.na(Negative_Communication))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Negative_Communication)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(3, 18)) +
  labs(
    title = "Negative_Communication_Score by PCOS Status",
    x = "PCOS Status",
    y = "Negative_Communication_Score"
  )

#### Overall Communicataion

Overall_Communication_Score_model <- aov(Overall_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Overall_Communication ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Overall_Communication by PCOS_Status
t = 0.086906, df = 298.77, p-value = 0.9308
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -3.022427  3.301708
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              60.03226               59.89262 
cohens_d(
  Overall_Communication ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |        95% CI
-------------------------
9.94e-03  | [-0.21, 0.23]

- Estimated using pooled SD.
summary(Overall_Communication_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1      1    1.48   0.008  0.931
Residuals   302  59571  197.26               
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Overall_Communication = mean(Overall_Communication, na.rm = TRUE),
    sd_Overall_Communication = sd(Overall_Communication, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Overall_Communication sd_Overall_Communication
  <fct>       <int>                      <dbl>                    <dbl>
1 Non-PCOS      155                       60.0                     15.0
2 PCOS          149                       59.9                     13.0
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Overall_Communication), is.finite (Overall_Communication), !is.na(Overall_Communication))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Overall_Communication)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(19, 85)) +
  labs(
    title = "Overall_Communication by PCOS Status",
    x = "PCOS Status",
    y = "Overall_Communication_Score"
  )

#### Trust Score

Trust_Score_model <- aov(Trust_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Trust_Score ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Trust_Score by PCOS_Status
t = -0.57959, df = 292.87, p-value = 0.5626
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.2451808  0.1336249
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              2.583871               2.639649 
cohens_d(
  Trust_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |        95% CI
-------------------------
-0.07     | [-0.29, 0.16]

- Estimated using pooled SD.
summary(Trust_Score_model)
             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1   0.24  0.2364   0.333  0.564
Residuals   302 214.31  0.7096               
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Trust_Score = mean(Trust_Score, na.rm = TRUE),
    sd_Trust_Score = sd(Trust_Score, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Trust_Score sd_Trust_Score
  <fct>       <int>            <dbl>          <dbl>
1 Non-PCOS      155             2.58          0.927
2 PCOS          149             2.64          0.745
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Trust_Score), is.finite (Trust_Score), !is.na(Trust_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Trust_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
    labs(
    title = "Trust_Score by PCOS Status",
    x = "PCOS Status",
    y = "Trust_Score"
  )

#### Hirsutism

Hirsutism_score_model <- aov(Hirsutism_score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Hirsutism_score ~ PCOS_Status, data = PCOS_Data_Scored)

    Welch Two Sample t-test

data:  Hirsutism_score by PCOS_Status
t = -6.7147, df = 276.98, p-value = 1.065e-10
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -5.760408 -3.148533
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              3.619355               8.073826 
cohens_d(
  Hirsutism_score ~ PCOS_Status,
  data = PCOS_Data_Scored
)
Cohen's d |         95% CI
--------------------------
-0.77     | [-1.01, -0.54]

- Estimated using pooled SD.
summary(Hirsutism_score_model)
             Df Sum Sq Mean Sq F value   Pr(>F)    
PCOS_Status   1   1507  1507.4   45.56 7.58e-11 ***
Residuals   302   9993    33.1                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Hirsutism_score = mean(Hirsutism_score, na.rm = TRUE),
    sd_Hirsutism_score = sd(Hirsutism_score, na.rm = TRUE)
  )
# A tibble: 2 × 4
  PCOS_Status     n mean_Hirsutism_score sd_Hirsutism_score
  <fct>       <int>                <dbl>              <dbl>
1 Non-PCOS      155                 3.62               4.95
2 PCOS          149                 8.07               6.48
plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Hirsutism_score), is.finite (Hirsutism_score), !is.na(Hirsutism_score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Hirsutism_score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
    labs(
    title = "Hirsutism score by PCOS Status",
    x = "PCOS Status",
    y = "Hirsutism score"
  )

\(\chi^2 Analyses\)

vars <- c("Education", "Smoking", "Alcohol", "Exercise_per_day", "Exercise_per_week","Type_of_exercise","Conditions","Hysterectomy","Contraceptive","Alopecia_stage","Acne","Insulin_resistance_medications","Menopause","Stopped_period","Race")



fisher_results <- lapply(vars, function(v) {
  
  tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored$PCOS_Status)
  
  # column percentages (within PCOS group)
  prop_tab <- prop.table(tab, margin = 2) * 100
  
  test <- fisher.test(tab)
  
  data.frame(
    Variable = v,
    p_value = test$p.value,
    levels = nrow(tab),
    pct_table = paste(round(prop_tab, 1), collapse = " | ")
  )
})

fisher_table <- do.call(rbind, fisher_results)

fisher_table
                         Variable      p_value levels
1                       Education 3.245780e-01      5
2                         Smoking 2.785361e-02      2
3                         Alcohol 5.727535e-02      2
4                Exercise_per_day 6.586983e-02      5
5               Exercise_per_week 4.685501e-01      6
6                Type_of_exercise 5.127177e-01      3
7                      Conditions 7.371856e-01     11
8                    Hysterectomy 2.146467e-01      2
9                   Contraceptive 3.119180e-13      2
10                 Alopecia_stage 7.041877e-12      4
11                           Acne 6.634508e-07      2
12 Insulin_resistance_medications 6.218387e-14      2
13                      Menopause 2.832929e-01      2
14                 Stopped_period 1.000000e+00      2
15                           Race 4.847089e-01      3
                                                                                                                   pct_table
1                                                                    0.6 | 0.6 | 29 | 65.2 | 4.5 | 0 | 0 | 37.6 | 57.7 | 4.7
2                                                                                                  21.3 | 78.7 | 32.9 | 67.1
3                                                                                                   86.3 | 13.7 | 93.3 | 6.7
4                                                           18.1 | 19.4 | 32.9 | 8.4 | 21.3 | 10.1 | 16.8 | 47 | 10.1 | 16.1
5                                                10.3 | 26.5 | 19.4 | 7.1 | 5.8 | 31 | 6.7 | 28.2 | 25.5 | 10.1 | 5.4 | 24.2
6                                                                                        49.5 | 41.1 | 9.3 | 46 | 47.8 | 6.2
7  21.4 | 42.9 | 7.1 | 0 | 7.1 | 0 | 7.1 | 7.1 | 0 | 0 | 7.1 | 10.7 | 39.3 | 7.1 | 0 | 14.3 | 3.6 | 17.9 | 0 | 3.6 | 3.6 | 0
8                                                                                                    3.2 | 96.8 | 0.7 | 99.3
9                                                                                                   8.4 | 91.6 | 44.3 | 55.7
10                                                                               91 | 7.7 | 1.3 | 0 | 56.4 | 33.6 | 10.1 | 0
11                                                                                                 40.6 | 59.4 | 69.1 | 30.9
12                                                                                                  2.6 | 97.4 | 34.2 | 65.8
13                                                                                                   3.9 | 96.1 | 1.3 | 98.7
14                                                                                                       3.9 | 96.1 | 4 | 96
15                                                                                   16.8 | 69.7 | 13.5 | 21.5 | 67.8 | 10.7
desc_table <- lapply(vars, function(v) {
  
  PCOS_Data_Scored |>
    count(Level = .data[[v]]) |>
    mutate(
      Variable = v,
      pct = round(n / sum(n) * 100, 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    select(Variable, Level, `n (%)`)
  
}) |>
  bind_rows()

desc_table
                         Variable                   Level       n (%)
1                       Education            No Education    1 (0.3%)
2                       Education                 Primary    1 (0.3%)
3                       Education               Secondary 101 (33.2%)
4                       Education                Tertiary 187 (61.5%)
5                       Education         Post-Vocational   14 (4.6%)
6                         Smoking                     Yes    82 (27%)
7                         Smoking                      No   222 (73%)
8                         Alcohol                     Yes 271 (89.1%)
9                         Alcohol                      No  31 (10.2%)
10                        Alcohol                    <NA>    2 (0.7%)
11               Exercise_per_day            < 10 minutes  43 (14.1%)
12               Exercise_per_day           10-19 minutes  55 (18.1%)
13               Exercise_per_day           10-39 minutes 121 (39.8%)
14               Exercise_per_day           40-59 minutes   28 (9.2%)
15               Exercise_per_day            > 60 minutes  57 (18.8%)
16              Exercise_per_week                   1 day   26 (8.6%)
17              Exercise_per_week                  2 days  83 (27.3%)
18              Exercise_per_week                  3 days  68 (22.4%)
19              Exercise_per_week                  4 days   26 (8.6%)
20              Exercise_per_week                 >5 days   17 (5.6%)
21              Exercise_per_week                    none  84 (27.6%)
22               Type_of_exercise                    Mild 105 (34.5%)
23               Type_of_exercise                Moderate  98 (32.2%)
24               Type_of_exercise               Strenuous   17 (5.6%)
25               Type_of_exercise                    <NA>  84 (27.6%)
26                     Conditions                     T2D      6 (2%)
27                     Conditions                     H2P   17 (5.6%)
28                     Conditions              Heart Cond      3 (1%)
29                     Conditions            Constipation    5 (1.6%)
30                     Conditions                  Cancer    1 (0.3%)
31                     Conditions               T2D & H2P      6 (2%)
32                     Conditions        T2D,H2P,& Cancer    1 (0.3%)
33                     Conditions T2D,H2P,Heart, & Cancer    1 (0.3%)
34                     Conditions      T2D & Constipation    1 (0.3%)
35                     Conditions             T2D & Heart    1 (0.3%)
36                     Conditions                    <NA> 262 (86.2%)
37                   Hysterectomy                     Yes      6 (2%)
38                   Hysterectomy                      No   298 (98%)
39                  Contraceptive                     Yes    79 (26%)
40                  Contraceptive                      No   225 (74%)
41                 Alopecia_stage             no alopecia   225 (74%)
42                 Alopecia_stage                 Stage 1  62 (20.4%)
43                 Alopecia_stage                 Stage 2   17 (5.6%)
44                           Acne                     Yes 166 (54.6%)
45                           Acne                      No 138 (45.4%)
46 Insulin_resistance_medications                     Yes  55 (18.1%)
47 Insulin_resistance_medications                      No 249 (81.9%)
48                      Menopause                     Yes    8 (2.6%)
49                      Menopause                      No 296 (97.4%)
50                 Stopped_period                     Yes   12 (3.9%)
51                 Stopped_period                      No 292 (96.1%)
52                           Race                 African  58 (19.1%)
53                           Race             East Indian 209 (68.8%)
54                           Race                   Mixed  37 (12.2%)
table(PCOS_Data_Scored$PCOS_Status)

Non-PCOS     PCOS 
     155      149 
library(dplyr)

vars <- c(
  "Education",
  "Smoking",
  "Alcohol",
  "Exercise_per_day",
  "Exercise_per_week",
  "Type_of_exercise",
  "Conditions",
  "Hysterectomy",
  "Contraceptive",
  "Alopecia_stage",
  "Acne",
  "Insulin_resistance_medications",
  "Menopause",
  "Stopped_period",
  "Race"
)


fisher_results <- lapply(vars, function(v) {
  
  tab <- table(
    PCOS_Data_Scored[[v]],
    PCOS_Data_Scored$PCOS_Status
  )
  
  prop_tab <- round(prop.table(tab, margin = 2) * 100, 1)
  
  test <- fisher.test(tab)
  
  OR <- NA
  LCL <- NA
  UCL <- NA
  OR_CI <- NA
  
  if (all(dim(tab) == c(2, 2))) {
    
    OR  <- unname(test$estimate)
    LCL <- test$conf.int[1]
    UCL <- test$conf.int[2]
    
    OR_CI <- sprintf("%.2f (%.2f, %.2f)", OR, LCL, UCL)
  }
  
  data.frame(
    Variable = v,
    Levels = nrow(tab),
    p_value = round(test$p.value, 4),
    Odds_Ratio = round(OR, 2),
    CI_Lower = round(LCL, 2),
    CI_Upper = round(UCL, 2),
    `OR (95% CI)` = OR_CI,
    pct_table = paste(prop_tab, collapse = " | "),
    stringsAsFactors = FALSE
  )
  
})

fisher_table <- bind_rows(fisher_results)

fisher_table
                         Variable Levels p_value Odds_Ratio CI_Lower CI_Upper
1                       Education      5  0.3246         NA       NA       NA
2                         Smoking      2  0.0279       0.55     0.32     0.95
3                         Alcohol      2  0.0573       0.45     0.18     1.05
4                Exercise_per_day      5  0.0659         NA       NA       NA
5               Exercise_per_week      6  0.4686         NA       NA       NA
6                Type_of_exercise      3  0.5127         NA       NA       NA
7                      Conditions     11  0.7372         NA       NA       NA
8                    Hysterectomy      2  0.2146       4.91     0.54   234.62
9                   Contraceptive      2  0.0000       0.12     0.06     0.23
10                 Alopecia_stage      4  0.0000         NA       NA       NA
11                           Acne      2  0.0000       0.31     0.19     0.50
12 Insulin_resistance_medications      2  0.0000       0.05     0.01     0.15
13                      Menopause      2  0.2833       2.95     0.52    30.35
14                 Stopped_period      2  1.0000       0.96     0.25     3.68
15                           Race      3  0.4847         NA       NA       NA
           OR..95..CI.
1                 <NA>
2    0.55 (0.32, 0.95)
3    0.45 (0.18, 1.05)
4                 <NA>
5                 <NA>
6                 <NA>
7                 <NA>
8  4.91 (0.54, 234.62)
9    0.12 (0.06, 0.23)
10                <NA>
11   0.31 (0.19, 0.50)
12   0.05 (0.01, 0.15)
13  2.95 (0.52, 30.35)
14   0.96 (0.25, 3.68)
15                <NA>
                                                                                                                   pct_table
1                                                                    0.6 | 0.6 | 29 | 65.2 | 4.5 | 0 | 0 | 37.6 | 57.7 | 4.7
2                                                                                                  21.3 | 78.7 | 32.9 | 67.1
3                                                                                                   86.3 | 13.7 | 93.3 | 6.7
4                                                           18.1 | 19.4 | 32.9 | 8.4 | 21.3 | 10.1 | 16.8 | 47 | 10.1 | 16.1
5                                                10.3 | 26.5 | 19.4 | 7.1 | 5.8 | 31 | 6.7 | 28.2 | 25.5 | 10.1 | 5.4 | 24.2
6                                                                                        49.5 | 41.1 | 9.3 | 46 | 47.8 | 6.2
7  21.4 | 42.9 | 7.1 | 0 | 7.1 | 0 | 7.1 | 7.1 | 0 | 0 | 7.1 | 10.7 | 39.3 | 7.1 | 0 | 14.3 | 3.6 | 17.9 | 0 | 3.6 | 3.6 | 0
8                                                                                                    3.2 | 96.8 | 0.7 | 99.3
9                                                                                                   8.4 | 91.6 | 44.3 | 55.7
10                                                                               91 | 7.7 | 1.3 | 0 | 56.4 | 33.6 | 10.1 | 0
11                                                                                                 40.6 | 59.4 | 69.1 | 30.9
12                                                                                                  2.6 | 97.4 | 34.2 | 65.8
13                                                                                                   3.9 | 96.1 | 1.3 | 98.7
14                                                                                                       3.9 | 96.1 | 4 | 96
15                                                                                   16.8 | 69.7 | 13.5 | 21.5 | 67.8 | 10.7
desc_table <- lapply(vars, function(v) {
  
  PCOS_Data_Scored |>
    count(Level = .data[[v]]) |>
    mutate(
      Variable = v,
      pct = round(100 * n / sum(n), 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    select(Variable, Level, `n (%)`)
  
}) |>
  bind_rows()

desc_table
                         Variable                   Level       n (%)
1                       Education            No Education    1 (0.3%)
2                       Education                 Primary    1 (0.3%)
3                       Education               Secondary 101 (33.2%)
4                       Education                Tertiary 187 (61.5%)
5                       Education         Post-Vocational   14 (4.6%)
6                         Smoking                     Yes    82 (27%)
7                         Smoking                      No   222 (73%)
8                         Alcohol                     Yes 271 (89.1%)
9                         Alcohol                      No  31 (10.2%)
10                        Alcohol                    <NA>    2 (0.7%)
11               Exercise_per_day            < 10 minutes  43 (14.1%)
12               Exercise_per_day           10-19 minutes  55 (18.1%)
13               Exercise_per_day           10-39 minutes 121 (39.8%)
14               Exercise_per_day           40-59 minutes   28 (9.2%)
15               Exercise_per_day            > 60 minutes  57 (18.8%)
16              Exercise_per_week                   1 day   26 (8.6%)
17              Exercise_per_week                  2 days  83 (27.3%)
18              Exercise_per_week                  3 days  68 (22.4%)
19              Exercise_per_week                  4 days   26 (8.6%)
20              Exercise_per_week                 >5 days   17 (5.6%)
21              Exercise_per_week                    none  84 (27.6%)
22               Type_of_exercise                    Mild 105 (34.5%)
23               Type_of_exercise                Moderate  98 (32.2%)
24               Type_of_exercise               Strenuous   17 (5.6%)
25               Type_of_exercise                    <NA>  84 (27.6%)
26                     Conditions                     T2D      6 (2%)
27                     Conditions                     H2P   17 (5.6%)
28                     Conditions              Heart Cond      3 (1%)
29                     Conditions            Constipation    5 (1.6%)
30                     Conditions                  Cancer    1 (0.3%)
31                     Conditions               T2D & H2P      6 (2%)
32                     Conditions        T2D,H2P,& Cancer    1 (0.3%)
33                     Conditions T2D,H2P,Heart, & Cancer    1 (0.3%)
34                     Conditions      T2D & Constipation    1 (0.3%)
35                     Conditions             T2D & Heart    1 (0.3%)
36                     Conditions                    <NA> 262 (86.2%)
37                   Hysterectomy                     Yes      6 (2%)
38                   Hysterectomy                      No   298 (98%)
39                  Contraceptive                     Yes    79 (26%)
40                  Contraceptive                      No   225 (74%)
41                 Alopecia_stage             no alopecia   225 (74%)
42                 Alopecia_stage                 Stage 1  62 (20.4%)
43                 Alopecia_stage                 Stage 2   17 (5.6%)
44                           Acne                     Yes 166 (54.6%)
45                           Acne                      No 138 (45.4%)
46 Insulin_resistance_medications                     Yes  55 (18.1%)
47 Insulin_resistance_medications                      No 249 (81.9%)
48                      Menopause                     Yes    8 (2.6%)
49                      Menopause                      No 296 (97.4%)
50                 Stopped_period                     Yes   12 (3.9%)
51                 Stopped_period                      No 292 (96.1%)
52                           Race                 African  58 (19.1%)
53                           Race             East Indian 209 (68.8%)
54                           Race                   Mixed  37 (12.2%)
table(PCOS_Data_Scored$PCOS_Status)

Non-PCOS     PCOS 
     155      149 
table(PCOS_Data_Scored$PCOS_Status)

Non-PCOS     PCOS 
     155      149 
table(PCOS_Data_Scored$PCOS_Status,PCOS_Data_Scored$Acne)
          
           Yes  No
  Non-PCOS  63  92
  PCOS     103  46
table(PCOS_Data_Scored$Race,PCOS_Data_Scored$PCOS_Status)
             
              Non-PCOS PCOS
  African           26   32
  East Indian      108  101
  Mixed             21   16
library(gt)


vars <- c(
  "Education", "Smoking", "Alcohol", "Exercise_per_day",
  "Exercise_per_week", "Type_of_exercise", "Conditions",
  "Hysterectomy", "Contraceptive", "Alopecia_stage",
  "Acne", "Insulin_resistance_medications",
  "Menopause", "Stopped_period", "Race"
)

group_var <- "PCOS_Status"


var_labels <- c(
  Education = "Education Level",
  Smoking = "Smoking Status",
  Alcohol = "Alcohol Use",
  Exercise_per_day = "Exercise (per day)",
  Exercise_per_week = "Exercise (per week)",
  Type_of_exercise = "Type of Exercise",
  Conditions = "Comorbid Conditions",
  Hysterectomy = "History of Hysterectomy",
  Contraceptive = "Contraceptive Use",
  Alopecia_stage = "Alopecia Stage",
  Acne = "Acne",
  Insulin_resistance_medications = "IR Medications",
  Menopause = "Menopause Status",
  Stopped_period = "Stopped Period",
  Race = "Race"
)

table1 <- map_dfr(vars, function(v) {
  
  tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored[[group_var]])
  
  test <- if (any(tab < 5)) {
    fisher.test(tab)
  } else {
    chisq.test(tab)
  }
  
  df <- as.data.frame(tab) |>
    rename(Level = Var1, Group = Var2, n = Freq) |>
    mutate(
      Level = as.character(Level),
      Group = as.character(Group)
    ) |>
    group_by(Group) |>
    mutate(
      pct = round(n / sum(n) * 100, 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    ungroup()
  
  df_wide <- df |>
    select(Level, Group, `n (%)`) |>
    pivot_wider(names_from = Group, values_from = `n (%)`)
  
  df_wide |>
    mutate(
      Variable = v,
      p_value = test$p.value,
      .before = 1
    )
  
}) |>
  group_by(Variable) |>
  mutate(
    p_value = ifelse(row_number() == 1, p_value, NA)
  ) |>
  ungroup()

table1 <- table1 |>
  mutate(
    Variable = recode(Variable, !!!var_labels),
    
    # indent levels for readability
    Level = paste0("   ", Level),
    
    # format p-values
    p_value = case_when(
      is.na(p_value) ~ "",
      p_value < 0.001 ~ "<0.001",
      TRUE ~ sprintf("%.3f", p_value)
    )
  )


gt_table <- table1 |>
  gt() |>
  cols_label(
    Variable = "",
    Level = "",
    p_value = "p-value"
  ) |>
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      columns = Variable,
      rows = !duplicated(Variable)
    )
  ) |>
  cols_align(
    align = "center",
    -c(Variable, Level)
  )


gt_table
p-value Non-PCOS PCOS
Education Level 0.325 No Education 1 (0.6%) 0 (0%)
Education Level Primary 1 (0.6%) 0 (0%)
Education Level Secondary 45 (29%) 56 (37.6%)
Education Level Tertiary 101 (65.2%) 86 (57.7%)
Education Level Post-Vocational 7 (4.5%) 7 (4.7%)
Smoking Status 0.032 Yes 33 (21.3%) 49 (32.9%)
Smoking Status No 122 (78.7%) 100 (67.1%)
Alcohol Use 0.069 Yes 132 (86.3%) 139 (93.3%)
Alcohol Use No 21 (13.7%) 10 (6.7%)
Exercise (per day) 0.066 < 10 minutes 28 (18.1%) 15 (10.1%)
Exercise (per day) 10-19 minutes 30 (19.4%) 25 (16.8%)
Exercise (per day) 10-39 minutes 51 (32.9%) 70 (47%)
Exercise (per day) 40-59 minutes 13 (8.4%) 15 (10.1%)
Exercise (per day) > 60 minutes 33 (21.3%) 24 (16.1%)
Exercise (per week) 0.465 1 day 16 (10.3%) 10 (6.7%)
Exercise (per week) 2 days 41 (26.5%) 42 (28.2%)
Exercise (per week) 3 days 30 (19.4%) 38 (25.5%)
Exercise (per week) 4 days 11 (7.1%) 15 (10.1%)
Exercise (per week) >5 days 9 (5.8%) 8 (5.4%)
Exercise (per week) none 48 (31%) 36 (24.2%)
Type of Exercise 0.497 Mild 53 (49.5%) 52 (46%)
Type of Exercise Moderate 44 (41.1%) 54 (47.8%)
Type of Exercise Strenuous 10 (9.3%) 7 (6.2%)
Comorbid Conditions 0.737 T2D 3 (21.4%) 3 (10.7%)
Comorbid Conditions H2P 6 (42.9%) 11 (39.3%)
Comorbid Conditions Heart Cond 1 (7.1%) 2 (7.1%)
Comorbid Conditions PCOS 0 (0%) 0 (0%)
Comorbid Conditions Constipation 1 (7.1%) 4 (14.3%)
Comorbid Conditions Cancer 0 (0%) 1 (3.6%)
Comorbid Conditions T2D & H2P 1 (7.1%) 5 (17.9%)
Comorbid Conditions T2D,H2P,& Cancer 1 (7.1%) 0 (0%)
Comorbid Conditions T2D,H2P,Heart, & Cancer 0 (0%) 1 (3.6%)
Comorbid Conditions T2D & Constipation 0 (0%) 1 (3.6%)
Comorbid Conditions T2D & Heart 1 (7.1%) 0 (0%)
History of Hysterectomy 0.215 Yes 5 (3.2%) 1 (0.7%)
History of Hysterectomy No 150 (96.8%) 148 (99.3%)
Contraceptive Use <0.001 Yes 13 (8.4%) 66 (44.3%)
Contraceptive Use No 142 (91.6%) 83 (55.7%)
Alopecia Stage <0.001 no alopecia 141 (91%) 84 (56.4%)
Alopecia Stage Stage 1 12 (7.7%) 50 (33.6%)
Alopecia Stage Stage 2 2 (1.3%) 15 (10.1%)
Alopecia Stage Stage 3 0 (0%) 0 (0%)
Acne <0.001 Yes 63 (40.6%) 103 (69.1%)
Acne No 92 (59.4%) 46 (30.9%)
IR Medications <0.001 Yes 4 (2.6%) 51 (34.2%)
IR Medications No 151 (97.4%) 98 (65.8%)
Menopause Status 0.283 Yes 6 (3.9%) 2 (1.3%)
Menopause Status No 149 (96.1%) 147 (98.7%)
Stopped Period 1.000 Yes 6 (3.9%) 6 (4%)
Stopped Period No 149 (96.1%) 143 (96%)
Race 0.493 African 26 (16.8%) 32 (21.5%)
Race East Indian 108 (69.7%) 101 (67.8%)
Race Mixed 21 (13.5%) 16 (10.7%)
library(flextable)

Attaching package: 'flextable'
The following objects are masked from 'package:kableExtra':

    as_image, footnote
The following object is masked from 'package:purrr':

    compose
library(officer)

ft <- flextable(table1)

ft <- ft |>
  autofit() |>
  theme_booktabs() |>
  align(align = "center", part = "all") |>
  bold(i = ~ !duplicated(Variable), j = 1)  # bold variable names

doc <- read_docx() |>
  body_add_par("Table 1. Descriptive Characteristics by PCOS Status", style = "heading 1") |>
  body_add_flextable(ft)

print(doc, target = "Table1.docx")
cont_vars <- c(
  "Age", "Weight", "Info_Score", "Emo_Score", "Respect_Score",
  "Quality_Communication", "Negative_Communication",
  "Overall_Communication", "Trust_Score", "Hirsutism_score"
)

group_var <- "PCOS_Status"

var_labels <- c(
  Age = "Age",
  Weight = "Weight",
  Info_Score = "Information Score",
  Emo_Score = "Emotion Score",
  Respect_Score = "Respect Score",
  Quality_Communication = "Quality Communication",
  Negative_Communication = "Negative Communication",
  Overall_Communication = "Overall Communication",
  Trust_Score = "Trust Score",
  Hirsutism_score = "Hirsutism Score"
)

table2 <- map_dfr(cont_vars, function(v) {
  
  #
  test <- t.test(PCOS_Data_Scored[[v]] ~ PCOS_Data_Scored[[group_var]])
  
  # group summaries
  sum_stats <- PCOS_Data_Scored |>
    group_by(.data[[group_var]]) |>
    summarise(
      mean = mean(.data[[v]], na.rm = TRUE),
      sd = sd(.data[[v]], na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(
      `Mean (SD)` = sprintf("%.2f (%.2f)", mean, sd)
    )
  

  wide <- sum_stats |>
    select(all_of(group_var), `Mean (SD)`) |>
    pivot_wider(names_from = all_of(group_var), values_from = `Mean (SD)`)
  
  wide |>
    mutate(
      Variable = v,
      p_value = test$p.value,
      .before = 1
    )
})

table2 <- table2 |>
  mutate(
    Variable = recode(Variable, !!!var_labels),
    
    p_value = case_when(
      p_value < 0.001 ~ "<0.001",
      TRUE ~ sprintf("%.3f", p_value)
    )
  )


ft <- flextable(table2)

ft <- ft |>
  autofit() |>
  theme_booktabs() |>
  align(align = "center", part = "all") |>
  bold(j = 1) |>
  set_header_labels(
    Variable = "",
    p_value = "p-value"
  )


doc <- read_docx() |>
  body_add_par("Table 2. Continuous Variables by PCOS Status", style = "heading 1") |>
  body_add_flextable(ft)

print(doc, target = "Table2_PCOS.docx")

Table with stats values:

library(dplyr)
library(purrr)
library(tidyr)
library(flextable)
library(officer)


cont_vars <- c(
  "Age", "Weight", "Info_Score", "Emo_Score", "Respect_Score",
  "Quality_Communication", "Negative_Communication",
  "Overall_Communication", "Trust_Score", "Hirsutism_score"
)

group_var <- "PCOS_Status"

var_labels <- c(
  Age = "Age",
  Weight = "Weight",
  Info_Score = "Information Score",
  Emo_Score = "Emotion Score",
  Respect_Score = "Respect Score",
  Quality_Communication = "Quality Communication",
  Negative_Communication = "Negative Communication",
  Overall_Communication = "Overall Communication",
  Trust_Score = "Trust Score",
  Hirsutism_score = "Hirsutism Score"
)

table2 <- map_dfr(cont_vars, function(v) {
 
  df <- PCOS_Data_Scored |>
    filter(!is.na(.data[[v]]), !is.na(.data[[group_var]])) |>
    mutate(group = as.factor(.data[[group_var]]))
  
  levels_group <- levels(df$group)

  test <- t.test(df[[v]] ~ df$group)
  
  # group summaries
  sum_stats <- df |>
    group_by(group) |>
    summarise(
      mean = mean(.data[[v]], na.rm = TRUE),
      sd = sd(.data[[v]], na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(
      Mean_SD = sprintf("%.2f (%.2f)", mean, sd)
    )
  
  # reshape wide
  wide <- sum_stats |>
    select(group, Mean_SD) |>
    pivot_wider(names_from = group, values_from = Mean_SD)
  
  # mean difference (Group1 - Group2)
  est <- test$estimate
  mean_diff <- unname(est[1] - est[2])
  
  wide |>
    mutate(
      Variable = v,
      Mean_Diff = sprintf("%.2f", mean_diff),
      CI = sprintf("(%.2f, %.2f)", test$conf.int[1], test$conf.int[2]),
      t_value = sprintf("%.2f", unname(test$statistic)),
      p_value = case_when(
        is.na(test$p.value) ~ "",
        test$p.value < 0.001 ~ "<0.001",
        TRUE ~ sprintf("%.3f", test$p.value)
      ),
      .before = 1
    )
})


table2 <- table2 |>
  mutate(
    Variable = recode(Variable, !!!var_labels)
  )


ft <- flextable(table2)

ft <- ft |>
  theme_booktabs() |>
  autofit() |>
  align(align = "center", part = "all") |>
  bold(j = 1) |>
  set_header_labels(
    Variable = "",
    Mean_Diff = "Mean Difference",
    CI = "95% CI",
    t_value = "t",
    p_value = "p-value"
  )

doc <- read_docx() |>
  body_add_par("Table 2. Continuous Variables by PCOS Status", style = "heading 1") |>
  body_add_par("Values are mean (SD) unless otherwise indicated. Mean difference represents Group 1 minus Group 2.", style = "Normal") |>
  body_add_flextable(ft)

print(doc, target = "Table2_PCOS.docx")

table(PCOS_Data_Scored$PCOS_Status)

Non-PCOS     PCOS 
     155      149 
table1 <- map_dfr(vars, function(v) {
  
  tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored[[group_var]])
  
  use_fisher <- any(tab < 5)
  
  test <- if (use_fisher) {
    fisher.test(tab)
  } else {
    chisq.test(tab)
  }
  
  df <- as.data.frame(tab) |>
    rename(Level = Var1, Group = Var2, n = Freq) |>
    mutate(
      Level = as.character(Level),
      Group = as.character(Group)
    ) |>
    group_by(Group) |>
    mutate(
      pct = round(n / sum(n) * 100, 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    ungroup()
  
  df_wide <- df |>
    select(Level, Group, `n (%)`) |>
    pivot_wider(names_from = Group, values_from = `n (%)`)
  
  df_wide |>
    mutate(
      Variable = v,
      test_type = ifelse(use_fisher, "Fisher", "χ²"),
      statistic = ifelse(use_fisher, NA, unname(test$statistic)),
      p_value = test$p.value,
      .before = 1
    )
  
}) |>
  group_by(Variable) |>
  mutate(
    p_value = ifelse(row_number() == 1, p_value, NA),
    test_type = ifelse(row_number() == 1, test_type, NA),
    statistic = ifelse(row_number() == 1, statistic, NA)
  ) |>
  ungroup()


table1 <- table1 |>
  mutate(
    Variable = recode(Variable, !!!var_labels),
    
    Level = paste0("   ", Level),
    
    p_value = case_when(
      is.na(p_value) ~ "",
      p_value < 0.001 ~ "<0.001",
      TRUE ~ sprintf("%.3f", p_value)
    ),
    
    statistic = case_when(
      is.na(statistic) ~ "",
      TRUE ~ sprintf("%.2f", statistic)
    )
  )


ft <- flextable(table1)

ft <- ft |>
  autofit() |>
  theme_booktabs() |>
  align(align = "center", part = "all") |>
  bold(i = ~ !duplicated(Variable), j = 1) |>
  set_header_labels(
    Variable = "",
    Level = "",
    test_type = "Test",
    statistic = "Statistic",
    p_value = "p-value"
  )


doc <- read_docx() |>
  body_add_par("Table 1. Descriptive Characteristics by PCOS Status", style = "heading 1") |>
  body_add_par("Values are n (%). χ² = Chi-square test; Fisher = Fisher’s exact test.", style = "Normal") |>
  body_add_flextable(ft)

print(doc, target = "Table1.docx")