Trinidad and Tabago Data

Measures:

PPEQ: Picker Patient Experience Questionnaire

PHCPC_F1: Patient-Health Care Provider Communication Scale - Factor 1

PHCPC_F2: Patient-Health Care Provider Communication Scale - Factor 2

HCRT: Health Care Relationship Trust Scale

Perceived Stigma Scale had no data

setwd("C:/Work Files/Collaborations/Kendall PCOS/T and T Project")

Packages Used

library(haven) #reading data from spss, sas, stata
library(tidyverse) #Data management

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library (corrplot) #correlation tables

corrplot 0.95 loaded

library(psych) ## basic psychometrics and statistics


Attaching package: 'psych'

The following objects are masked from 'package:ggplot2':

    %+%, alpha

library(finalfit) #Testing Assumptions
library (performance) #Testing Assumptions
library(MVN) #Assessing multivariate normality


Attaching package: 'MVN'

The following object is masked from 'package:psych':

    mardia

library(knitr) #Making Tables
library(kableExtra) #Making Tables


Attaching package: 'kableExtra'

The following object is masked from 'package:dplyr':

    group_rows

library(apaTables) #Generating APA formated tables
library(effectsize)


Attaching package: 'effectsize'

The following object is masked from 'package:psych':

    phi

library(mice)


Attaching package: 'mice'

The following object is masked from 'package:stats':

    filter

The following objects are masked from 'package:base':

    cbind, rbind

library(naniar)

Data Input

PCOS_Data <-read.csv("MBBS_Group_2_Data.csv")
PCOS_Data <-(PCOS_Data[1:304,])

table(PCOS_Data$PCOS_Status)


  0   1 
155 149

Data Management

PCOS_Data$PPEQ_11 <- as.numeric(PCOS_Data$PPEQ_11)

Warning: NAs introduced by coercion

PCOS_Data$Age <-as.numeric(PCOS_Data$Age)

PCOS_Data$Exercise_per_week <-factor(PCOS_Data$Exercise_per_week,
                                     levels = c(1,2,3,4,5,6),
                                     labels = c("1 day", "2 days", "3 days", "4 days", ">5 days", "none"))

PCOS_Data$Type_of_exercise <- factor(PCOS_Data$Type_of_exercise,
                                     levels = c(1,2,3),
                                     labels = c("Mild", "Moderate", "Strenuous"))

PCOS_Data$Hysterectomy <-factor(PCOS_Data$Hysterectomy,
                                levels = c(1,2),
                                labels = c("Yes", "No"))

PCOS_Data$Education <- factor(PCOS_Data$Education,
                              levels = c(1,2,3,4,5),
                              labels = c("No Education","Primary","Secondary","Tertiary", "Post-Vocational"))

PCOS_Data$Smoking <- factor(PCOS_Data$Smoking,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Alcohol <- factor(PCOS_Data$Alcohol,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Contraceptive <- factor(PCOS_Data$Contraceptive,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Menopause <- factor(PCOS_Data$Menopause,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Acne <- factor(PCOS_Data$Acne,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Stopped_period <- factor(PCOS_Data$Stopped_period,
                            levels = c(1,2),
                            labels = c("Yes", "No"))

PCOS_Data$Alopecia_stage <- factor(PCOS_Data$Alopecia_stage,
                                   levels = c(0,1,2,3),
                                   labels = c("no alopecia", "Stage 1", "Stage 2", "Stage 3"))


PCOS_Data$Insulin_resistance_medications <- factor(PCOS_Data$Insulin_resistance_medications,
                            levels = c(1,2),
                            labels = c("Yes", "No"))                                    

PCOS_Data$Conditions <- factor(PCOS_Data$Conditions,
                               levels = c(1,2,3,4,5,6,7,8,9,10,11,12),
                               labels = c("T2D","H2P","Heart Cond","PCOS","Constipation","Cancer","T2D & H2P", "T2D,H2P,& Cancer", "T2D,H2P,Heart, & Cancer","T2D & Constipation","T2D & Heart","T2D & H2P"))

PCOS_Data$PCOS_Status <- factor(PCOS_Data$PCOS_Status,
                                levels = c(0,1),
                                labels = c("Non-PCOS","PCOS"))

PCOS_Data$Race <- factor(PCOS_Data$Race,
                         levels = c(1,2,3),
                         labels = c("African","East Indian","Mixed"))

PCOS_Data$Exercise_per_day <- factor(PCOS_Data$Exercise_per_day,
                                            levels = c(1,2,3,4,5),
                                            labels = c("< 10 minutes","10-19 minutes", "10-39 minutes","40-59 minutes","> 60 minutes"))

PPEQ Scoring

###INFO_Score


items <- c("PPEQ_1","PPEQ_2","PPEQ_14","PPEQ_15","PPEQ_16")

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(items), ~ ifelse(. == 4, NA, .)))

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(items), ~ 4 - ., .names = "{.col}_r"))

info_items_r <- c("PPEQ_1_r","PPEQ_2_r","PPEQ_14_r","PPEQ_15_r","PPEQ_16_r")

PCOS_Data <- PCOS_Data |>
  mutate(
    Info_Score = ifelse(
      rowSums(!is.na(pick(all_of(info_items_r)))) >= 3,
      rowMeans(pick(all_of(info_items_r)), na.rm = TRUE),
      NA
    )
  )

#### Emotion_Score

emo_items <- c("PPEQ_4","PPEQ_8","PPEQ_9")

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(emo_items), ~ ifelse(. == 4, NA, .)))

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(emo_items), ~ 4 - ., .names = "{.col}_r"))

emo_items_r <- c("PPEQ_4_r","PPEQ_8_r","PPEQ_9_r")

PCOS_Data <- PCOS_Data |>
  mutate(
    Emo_Score = ifelse(
      rowSums(!is.na(pick(all_of(emo_items_r)))) >= 2,
      rowMeans(pick(all_of(emo_items_r)), na.rm = TRUE),
      NA
    )
  )

#### Respect Score

respect_items <- c("PPEQ_5", "PPEQ_7")

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(respect_items), ~ ifelse(. == 4, NA, .)))

PCOS_Data <- PCOS_Data |>
  mutate(
    PPEQ_5_r = 4 - PPEQ_5,
      )

PCOS_Data <- PCOS_Data |>
  mutate(
    Respect_Score = ifelse(
      rowSums(!is.na(pick(PPEQ_5_r, PPEQ_7))) >= 1,
      rowMeans(pick(PPEQ_5_r, PPEQ_7), na.rm = TRUE),
      NA
    )
  )

Patient Healthcare Provider Communication Scale

qc_items <- paste0("PHCPC_F1_", 1:17)

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(qc_items), as.numeric))

PCOS_Data <- PCOS_Data |>
  mutate(
    Quality_Communication = rowSums(
      pick(all_of(qc_items)),
      na.rm = TRUE
    )
  )

nc_items <- paste0("PHCPC_F2_", 1:4)

PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(nc_items), as.numeric))

PCOS_Data <- PCOS_Data |>
  mutate(
    Negative_Communication = rowSums(
      pick(all_of(nc_items)),
      na.rm = TRUE
    )
  )

### Overall Communication

range(PCOS_Data$Negative_Communication, na.rm = TRUE)

[1]  4 16

PCOS_Data <- PCOS_Data |>
  mutate(
    Negative_Communication_r = (16 + 4) - Negative_Communication
  )

PCOS_Data <- PCOS_Data |>
  mutate(
    Overall_Communication = Quality_Communication + Negative_Communication_r
  )

Health Care Provider Trust Scale

neg_items <- c("HCRT_1", "HCRT_11", "HCRT_13")



PCOS_Data <- PCOS_Data |>
  mutate(across(all_of(neg_items), ~ 4 - ., .names = "{.col}_r"))

trust_items <- c(
  "HCRT_1_r",
  "HCRT_2", "HCRT_3", "HCRT_4", "HCRT_5",
  "HCRT_6", "HCRT_7", "HCRT_8", "HCRT_9",
  "HCRT_10", "HCRT_11_r", "HCRT_12", "HCRT_13_r",
  "HCRT_14"
)

PCOS_Data <- PCOS_Data |>
  mutate(
    trust_valid = rowSums(!is.na(pick(all_of(trust_items)))),
    Trust_Score = ifelse(
      trust_valid >= 10,
      rowMeans(pick(all_of(trust_items)), na.rm = TRUE),
      NA
    )
  ) |>
  select(-trust_valid)

New Data Set with Scored Scales

PCOS_Data_Scored <-write.csv(PCOS_Data,"PCOS_Data_Scored.csv")
PCOS_Data_Scored <-write_sav(PCOS_Data,"PCOS_Data_Scored.sav")

Descriptive Statistics

describe(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])

                       vars   n   mean    sd median trimmed   mad    min max
Height                    1 303 161.53 10.10 162.60  162.29  7.41 120.00 190
Weight                    2 303  69.15 15.46  67.00   67.85 11.86   6.00 143
Hirsutism_score           3 304   5.80  6.16   3.00    5.02  4.45   0.00  26
Age                       4 304  29.07  7.39  28.00   28.55  7.41  18.00  45
Emo_Score                 5 294   1.97  0.50   2.00    1.96  0.49   1.00   3
Respect_Score             6 304   1.62  0.53   1.50    1.58  0.74   1.00   3
Quality_Communication     7 304  47.23 11.82  47.00   47.62 11.86  17.00  68
Negative_Communication    8 304   7.26  3.06   6.50    6.82  2.22   4.00  16
Overall_Communication     9 304  59.96 14.02  61.00   60.77 13.34  21.00  84
Trust_Score              10 304   2.61  0.84   2.71    2.67  0.85   0.21   4
                        range  skew kurtosis   se
Height                  70.00 -1.19     3.73 0.58
Weight                 137.00  1.12     3.99 0.89
Hirsutism_score         26.00  0.83    -0.38 0.35
Age                     27.00  0.52    -0.79 0.42
Emo_Score                2.00  0.16    -0.40 0.03
Respect_Score            2.00  0.36    -0.68 0.03
Quality_Communication   51.00 -0.29    -0.46 0.68
Negative_Communication  12.00  1.13     0.78 0.18
Overall_Communication   63.00 -0.48    -0.19 0.80
Trust_Score              3.79 -0.57     0.08 0.05

describe.by(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)],PCOS_Data_Scored$PCOS_Status)

Warning in describe.by(PCOS_Data_Scored[, c(2, 3, 18, 20, 82, 84, 85, 86, :
describe.by is deprecated.  Please use the describeBy function


 Descriptive statistics by group 
group: Non-PCOS
                       vars   n   mean    sd median trimmed   mad    min max
Height                    1 154 160.83 10.55 162.00  161.80  7.04 120.00 190
Weight                    2 155  64.74 11.05  63.00   63.93  8.90  44.50 125
Hirsutism_score           3 155   3.62  4.95   1.00    2.83  1.48   0.00  18
Age                       4 155  29.45  7.55  29.00   28.90  8.90  18.00  45
Emo_Score                 5 146   2.03  0.46   2.00    2.02  0.49   1.00   3
Respect_Score             6 155   1.65  0.57   1.50    1.60  0.74   1.00   3
Quality_Communication     7 155  47.17 12.42  48.00   47.74 11.86  17.00  68
Negative_Communication    8 155   7.14  3.30   6.00    6.58  2.97   4.00  16
Overall_Communication     9 155  60.03 15.00  61.00   61.14 13.34  21.00  84
Trust_Score              10 155   2.58  0.93   2.71    2.66  0.85   0.21   4
                       range  skew kurtosis   se
Height                 70.00 -1.16     3.23 0.85
Weight                 80.50  1.35     4.65 0.89
Hirsutism_score        18.00  1.14    -0.23 0.40
Age                    27.00  0.52    -0.79 0.61
Emo_Score               2.00  0.37    -0.35 0.04
Respect_Score           2.00  0.45    -0.63 0.05
Quality_Communication  51.00 -0.42    -0.40 1.00
Negative_Communication 12.00  1.27     0.87 0.27
Overall_Communication  63.00 -0.65    -0.12 1.20
Trust_Score             3.79 -0.67    -0.15 0.07
------------------------------------------------------------ 
group: PCOS
                       vars   n   mean    sd median trimmed   mad    min   max
Height                    1 149 162.24  9.59 163.00  162.78  7.41 121.00 190.0
Weight                    2 148  73.78 17.91  72.40   72.76 14.68   6.00 143.0
Hirsutism_score           3 149   8.07  6.48   7.00    7.60  8.90   0.00  26.0
Age                       4 149  28.68  7.23  27.00   28.22  7.41  18.00  45.0
Emo_Score                 5 148   1.91  0.53   2.00    1.90  0.49   1.00   3.0
Respect_Score             6 149   1.59  0.48   1.50    1.57  0.74   1.00   2.5
Quality_Communication     7 149  47.28 11.21  47.00   47.45 11.86  20.00  68.0
Negative_Communication    8 149   7.39  2.78   7.00    7.09  2.97   4.00  16.0
Overall_Communication     9 149  59.89 12.98  59.00   60.22 13.34  29.00  84.0
Trust_Score              10 149   2.64  0.74   2.71    2.66  0.74   0.36   4.0
                        range  skew kurtosis   se
Height                  69.00 -1.17     4.21 0.79
Weight                 137.00  0.67     2.87 1.47
Hirsutism_score         26.00  0.49    -0.76 0.53
Age                     27.00  0.50    -0.87 0.59
Emo_Score                2.00  0.11    -0.61 0.04
Respect_Score            1.50  0.11    -1.20 0.04
Quality_Communication   48.00 -0.09    -0.67 0.92
Negative_Communication  12.00  0.91     0.42 0.23
Overall_Communication   55.00 -0.21    -0.52 1.06
Trust_Score              3.64 -0.27    -0.04 0.06

PCOS_Data_Scored_No_NA <- na.omit(PCOS_Data_Scored)

apa.cor.table(PCOS_Data_Scored[,c(2,3,18,20,82,84,85,86,88,92)])



Means, standard deviations, and correlations with confidence intervals
 

  Variable                  M      SD    1            2            3           
  1. Height                 161.53 10.10                                       
                                                                               
  2. Weight                 69.15  15.46 .27**                                 
                                         [.16, .37]                            
                                                                               
  3. Hirsutism_score        5.80   6.16  .05          .19**                    
                                         [-.06, .17]  [.08, .30]               
                                                                               
  4. Age                    29.07  7.39  .06          .16**        -.02        
                                         [-.05, .17]  [.05, .27]   [-.14, .09] 
                                                                               
  5. Emo_Score              1.97   0.50  .14*         -.13*        .15*        
                                         [.02, .25]   [-.24, -.01] [.03, .26]  
                                                                               
  6. Respect_Score          1.62   0.53  -.11         -.05         -.05        
                                         [-.22, .00]  [-.17, .06]  [-.16, .06] 
                                                                               
  7. Quality_Communication  47.23  11.82 .17**        -.04         .11*        
                                         [.06, .28]   [-.15, .07]  [.00, .22]  
                                                                               
  8. Negative_Communication 7.26   3.06  -.13*        .04          -.16**      
                                         [-.24, -.02] [-.08, .15]  [-.27, -.05]
                                                                               
  9. Overall_Communication  59.96  14.02 .18**        -.04         .13*        
                                         [.06, .28]   [-.15, .07]  [.02, .24]  
                                                                               
  10. Trust_Score           2.61   0.84  .15**        -.04         .19**       
                                         [.04, .26]   [-.15, .08]  [.08, .30]  
                                                                               
  4           5            6            7            8            9         
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
                                                                            
  .03                                                                       
  [-.09, .14]                                                               
                                                                            
  .15**       -.21**                                                        
  [.04, .26]  [-.32, -.10]                                                  
                                                                            
  -.08        .41**        -.54**                                           
  [-.19, .04] [.31, .50]   [-.62, -.46]                                     
                                                                            
  .15**       -.23**       .42**        -.66**                              
  [.04, .26]  [-.34, -.12] [.32, .51]   [-.72, -.59]                        
                                                                            
  -.10        .40**        -.55**       .99**        -.77**                 
  [-.21, .02] [.29, .49]   [-.62, -.46] [.98, .99]   [-.81, -.72]           
                                                                            
  -.11        .34**        -.54**       .85**        -.72**       .88**     
  [-.22, .00] [.23, .44]   [-.62, -.46] [.82, .88]   [-.77, -.66] [.85, .90]
                                                                            

Note. M and SD are used to represent mean and standard deviation, respectively.
Values in square brackets indicate the 95% confidence interval.
The confidence interval is a plausible range of population correlations 
that could have caused the sample correlation (Cumming, 2014).
 * indicates p < .05. ** indicates p < .01.

res <- cor(PCOS_Data_Scored_No_NA[,c(2,3,18,20,82,84,85,86,88,92)])
round(res, 2)

                       Height Weight Hirsutism_score   Age Emo_Score
Height                   1.00  -0.27            0.10  0.22      0.17
Weight                  -0.27   1.00            0.16  0.19     -0.24
Hirsutism_score          0.10   0.16            1.00  0.12      0.17
Age                      0.22   0.19            0.12  1.00     -0.20
Emo_Score                0.17  -0.24            0.17 -0.20      1.00
Respect_Score            0.35  -0.48            0.33  0.41     -0.01
Quality_Communication   -0.11   0.15            0.13  0.05      0.37
Negative_Communication   0.17  -0.04           -0.30  0.16     -0.35
Overall_Communication   -0.15   0.16            0.21  0.01      0.46
Trust_Score              0.01   0.01            0.10 -0.14      0.50
                       Respect_Score Quality_Communication
Height                          0.35                 -0.11
Weight                         -0.48                  0.15
Hirsutism_score                 0.33                  0.13
Age                             0.41                  0.05
Emo_Score                      -0.01                  0.37
Respect_Score                   1.00                 -0.12
Quality_Communication          -0.12                  1.00
Negative_Communication          0.09                  0.03
Overall_Communication          -0.14                  0.96
Trust_Score                     0.01                  0.70
                       Negative_Communication Overall_Communication Trust_Score
Height                                   0.17                 -0.15        0.01
Weight                                  -0.04                  0.16        0.01
Hirsutism_score                         -0.30                  0.21        0.10
Age                                      0.16                  0.01       -0.14
Emo_Score                               -0.35                  0.46        0.50
Respect_Score                            0.09                 -0.14        0.01
Quality_Communication                    0.03                  0.96        0.70
Negative_Communication                   1.00                 -0.24       -0.19
Overall_Communication                   -0.24                  1.00        0.74
Trust_Score                             -0.19                  0.74        1.00

corrplot(res,
         type = "lower",
         order = "hclust",
         tl.col = "black",
         tl.srt = 45,
         addCoef.col = "black",
         sig.level = 0.05,
         insig = "blank")

png("correlation_plot.png", width = 2000, height = 2000, res = 300)

corrplot(res,
         type = "lower",
         order = "hclust",
         tl.col = "black",
         tl.srt = 45,
         addCoef.col = "black",
         number.cex = 0.6,
        
         insig = "blank")

dev.off()

png 
  2

describe(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])%>%
  knitr::kable(digits = 3, format="html", booktabs=TRUE, caption="Table 1. Descriptives")%>%
  kable_classic(full_width = F, html_font = "Cambria")

Table 1. Descriptives
	vars	n	mean	sd	median	trimmed	mad	min	max	range	skew	kurtosis	se
Height	1	303	161.526	10.099	162.600	162.291	7.413	120.000	190	70.000	-1.185	3.731	0.580
Weight	2	303	69.154	15.456	67.000	67.854	11.861	6.000	143	137.000	1.121	3.991	0.888
Hirsutism_score	3	304	5.803	6.161	3.000	5.025	4.448	0.000	26	26.000	0.831	-0.376	0.353
Age	4	304	29.069	7.395	28.000	28.549	7.413	18.000	45	27.000	0.516	-0.794	0.424
Emo_Score	5	294	1.970	0.499	2.000	1.963	0.494	1.000	3	2.000	0.162	-0.400	0.029
Respect_Score	6	304	1.625	0.532	1.500	1.580	0.741	1.000	3	2.000	0.361	-0.679	0.031
Quality_Communication	7	304	47.227	11.821	47.000	47.623	11.861	17.000	68	51.000	-0.290	-0.462	0.678
Negative_Communication	8	304	7.263	3.057	6.500	6.824	2.224	4.000	16	12.000	1.131	0.782	0.175
Overall_Communication	9	304	59.964	14.022	61.000	60.770	13.343	21.000	84	63.000	-0.483	-0.189	0.804
Trust_Score	10	304	2.611	0.841	2.714	2.665	0.847	0.214	4	3.786	-0.569	0.081	0.048

random <- rnorm(nrow(PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)]), 7)
#The command above generates a random variable with the same number of rows (values)as the dataset
hist(random)#just to check the distribtuion of this new variable

fakereg <-lm(random ~., data = PCOS_Data_Scored [,c(2,3,18,20,82,84,85,86,88,92)])
##runs a regression with the new random variable as the dv and all the variables in the dataset as IVs
##This generates a set of residuals in order to check the assumptions

##The following set of code just scales the residuals
standardized <- rstudent(fakereg)
fitted <- scale(fakereg$fitted.values)
hist(fitted)

check_model (fakereg)

MVN::mvn(
  PCOS_Data_Scored_No_NA[, c(2,3,18,20,82,84,85,86,88,92)]
)

$multivariate_normality
           Test Statistic p.value     Method          MVN
1 Henze-Zirkler     1.006   0.002 asymptotic ✗ Not normal

$univariate_normality
               Test               Variable Statistic p.value    Normality
1  Anderson-Darling                 Height     0.297    0.56     ✓ Normal
2  Anderson-Darling                 Weight     1.521  <0.001 ✗ Not normal
3  Anderson-Darling        Hirsutism_score     1.014   0.009 ✗ Not normal
4  Anderson-Darling                    Age     1.163   0.004 ✗ Not normal
5  Anderson-Darling              Emo_Score     0.779   0.036 ✗ Not normal
6  Anderson-Darling          Respect_Score     1.058   0.007 ✗ Not normal
7  Anderson-Darling  Quality_Communication     0.408   0.318     ✓ Normal
8  Anderson-Darling Negative_Communication     0.982   0.011 ✗ Not normal
9  Anderson-Darling  Overall_Communication     0.281   0.605     ✓ Normal
10 Anderson-Darling            Trust_Score     0.565   0.126     ✓ Normal

$descriptives
                 Variable  n    Mean Std.Dev  Median     Min     Max    25th
1                  Height 22 165.864   5.676 165.000 157.000 178.000 162.250
2                  Weight 22  74.286  12.689  71.900  62.000 113.600  66.250
3         Hirsutism_score 22   7.636   5.551   9.000   0.000  15.000   2.250
4                     Age 22  34.682   8.191  39.000  19.000  45.000  29.250
5               Emo_Score 22   2.197   0.456   2.000   1.333   3.000   2.000
6           Respect_Score 22   1.705   0.549   1.750   1.000   2.500   1.125
7   Quality_Communication 22  52.727   7.598  52.000  40.000  68.000  45.500
8  Negative_Communication 22   6.591   2.153   6.000   4.000  12.000   5.000
9   Overall_Communication 22  66.136   7.827  64.500  53.000  84.000  59.500
10            Trust_Score 22   2.981   0.555   2.857   2.000   3.786   2.589
      75th   Skew Kurtosis
1  170.000  0.334    2.465
2   77.750  1.714    5.562
3   12.750 -0.266    1.489
4   41.000 -0.697    2.093
5    2.333  0.319    2.481
6    2.000  0.016    1.725
7   57.000  0.256    2.366
8    7.750  1.045    3.356
9   72.000  0.311    2.478
10   3.554  0.047    1.795

$data
    Height Weight Hirsutism_score Age Emo_Score Respect_Score
26     163   63.0               3  19  2.666667           1.0
93     170   67.0               0  21  3.000000           1.0
106    172   68.0               0  37  1.666667           2.0
110    168   63.0               6  39  2.000000           1.5
116    163   78.0               0  39  2.333333           1.5
118    165   78.0              13  42  2.000000           1.5
119    164   68.0               7  41  1.333333           2.5
129    162   78.0              11  32  2.000000           2.0
137    178   66.0               9  41  2.333333           2.5
138    160   64.0              12  39  2.333333           2.5
139    170   72.0              11  42  2.666667           2.5
140    167   77.0              13  30  3.000000           2.0
148    170   62.0              12  40  3.000000           2.0
153    160   72.0              14  24  2.000000           2.0
155    169   77.0              15  42  2.333333           1.5
169    176   67.0              14  27  2.000000           2.0
221    160   72.7               2  20  1.666667           1.0
223    165   71.8               0  45  2.000000           2.0
224    170  113.6               9  42  1.666667           1.0
229    157   95.9              13  29  2.333333           1.0
246    163   65.0               0  32  2.000000           1.5
256    157   95.3               4  40  2.000000           1.0
    Quality_Communication Negative_Communication Overall_Communication
26                     61                      6                    75
93                     44                      6                    58
106                    51                      5                    66
110                    47                      6                    61
116                    51                      6                    65
118                    51                      7                    64
119                    40                      7                    53
129                    44                      5                    59
137                    53                      9                    64
138                    56                      5                    71
139                    57                      5                    72
140                    59                      5                    74
148                    56                      4                    72
153                    51                      8                    63
155                    58                      5                    73
169                    44                      9                    55
221                    44                      5                    59
223                    56                     12                    64
224                    57                      8                    69
229                    68                      4                    84
246                    67                     11                    76
256                    45                      7                    58
    Trust_Score
26     3.785714
93     2.571429
106    2.857143
110    2.642857
116    2.928571
118    2.857143
119    2.142857
129    2.714286
137    3.142857
138    3.500000
139    3.571429
140    3.785714
148    3.642857
153    2.428571
155    2.000000
169    2.714286
221    2.714286
223    2.571429
224    3.357143
229    3.714286
246    3.571429
256    2.357143

$subset
NULL

$outlierMethod
[1] "none"

attr(,"class")
[1] "mvn"

vis_miss(PCOS_Data_Scored)

gg_miss_var(PCOS_Data_Scored)

md.pattern(PCOS_Data_Scored[,c(2,3,18,20,82,84,85,86,88,92)])

    Hirsutism_score Age Respect_Score Quality_Communication
292               1   1             1                     1
10                1   1             1                     1
1                 1   1             1                     1
1                 1   1             1                     1
                  0   0             0                     0
    Negative_Communication Overall_Communication Trust_Score Height Weight
292                      1                     1           1      1      1
10                       1                     1           1      1      1
1                        1                     1           1      1      0
1                        1                     1           1      0      1
                         0                     0           0      1      1
    Emo_Score   
292         1  0
10          0  1
1           1  1
1           1  1
           10 12

Analyses

### Age 

str(PCOS_Data$PCOS_Status)

 Factor w/ 2 levels "Non-PCOS","PCOS": 2 2 2 2 2 2 2 2 2 2 ...

Age_model <- aov(Age ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Age ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Age by PCOS_Status
t = 0.9049, df = 302, p-value = 0.3662
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.9013285  2.4359464
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              29.44516               28.67785

summary(Age_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1     45   44.73   0.817  0.367
Residuals   302  16525   54.72

cohens_d(
  Age ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |        95% CI
-------------------------
0.10      | [-0.12, 0.33]

- Estimated using pooled SD.

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_age = mean(Age, na.rm = TRUE),
    sd_age = sd(Age, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_age sd_age
  <fct>       <int>    <dbl>  <dbl>
1 Non-PCOS      155     29.4   7.55
2 PCOS          149     28.7   7.23

ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Age)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  labs(
    title = "Age by PCOS Status",
    x = "PCOS Status",
    y = "Age"
  )

### Weight 

Weight_model <- aov(Weight ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Weight ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Weight by PCOS_Status
t = -5.2563, df = 242.71, p-value = 3.22e-07
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -12.422722  -5.650041
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              64.74065               73.77703

summary(Weight_model)

             Df Sum Sq Mean Sq F value   Pr(>F)    
PCOS_Status   1   6182    6182   28.21 2.12e-07 ***
Residuals   301  65965     219                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
1 observation deleted due to missingness

cohens_d(
  Weight ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Warning: Missing values detected. NAs dropped.

Cohen's d |         95% CI
--------------------------
-0.61     | [-0.84, -0.38]

- Estimated using pooled SD.

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Weight = mean(Weight, na.rm = TRUE),
    sd_Weight = sd(Weight, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Weight sd_Weight
  <fct>       <int>       <dbl>     <dbl>
1 Non-PCOS      155        64.7      11.1
2 PCOS          149        73.8      17.9

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Weight), is.finite(Weight), !is.na(PCOS_Status))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Weight)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  labs(
    title = "Weight by PCOS Status",
    x = "PCOS Status",
    y = "Weight"
  )

Warning: Removed 1 row containing non-finite outside the scale range
(`stat_boxplot()`).

Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

#### PPEQ Info Score


Info_Score_model <- aov(Info_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Info_Score ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Info_Score by PCOS_Status
t = -0.030664, df = 294.03, p-value = 0.9756
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.1169905  0.1134007
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              2.022478               2.024273

cohens_d(
 Info_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Warning: Missing values detected. NAs dropped.

Cohen's d |        95% CI
-------------------------
-3.53e-03 | [-0.23, 0.22]

- Estimated using pooled SD.

summary(Info_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1   0.00 0.00024   0.001  0.976
Residuals   299  77.31 0.25856               
3 observations deleted due to missingness

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Info_Score = mean(Info_Score, na.rm = TRUE),
    sd_Info_Score = sd(Info_Score, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Info_Score sd_Info_Score
  <fct>       <int>           <dbl>         <dbl>
1 Non-PCOS      155            2.02         0.545
2 PCOS          149            2.02         0.469

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Info_Score), is.finite(Info_Score), !is.na(Info_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Info_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  labs(
    title = "Info_Score by PCOS Status",
    x = "PCOS Status",
    y = "Info_Score"
  )

Warning: Removed 3 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_point()`).

#### PPEQ Emotion Score


Emo_Score_model <- aov(Emo_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Emo_Score ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Emo_Score by PCOS_Status
t = 2.2102, df = 288.11, p-value = 0.02788
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 0.0139822 0.2414479
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              2.034247               1.906532

cohens_d(
  Emo_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Warning: Missing values detected. NAs dropped.

Cohen's d |       95% CI
------------------------
0.26      | [0.03, 0.49]

- Estimated using pooled SD.

summary(Emo_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)  
PCOS_Status   1   1.20  1.1988   4.876  0.028 *
Residuals   292  71.79  0.2458                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
10 observations deleted due to missingness

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Emo_Score = mean(Emo_Score, na.rm = TRUE),
    sd_Emo_Score = sd(Emo_Score, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Emo_Score sd_Emo_Score
  <fct>       <int>          <dbl>        <dbl>
1 Non-PCOS      155           2.03        0.462
2 PCOS          149           1.91        0.527

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Emo_Score), is.finite(Emo_Score), !is.na(Emo_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Emo_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(0, 4)) +
  labs(
    title = "Emotion_Score by PCOS Status",
    x = "PCOS Status",
    y = "Emo_Score"
  )

Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_boxplot()`).

Warning: Removed 10 rows containing missing values or values outside the scale range
(`geom_point()`).

#### Respect Score

Respect_Score_model <- aov(Respect_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Respect_Score ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Respect_Score by PCOS_Status
t = 1.0008, df = 296.98, p-value = 0.3177
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.05883484  0.18059280
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              1.654839               1.593960

cohens_d(
  Respect_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |        95% CI
-------------------------
0.11      | [-0.11, 0.34]

- Estimated using pooled SD.

summary(Respect_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1   0.28  0.2816   0.995  0.319
Residuals   302  85.47  0.2830

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Respect_Score = mean(Respect_Score, na.rm = TRUE),
    sd_Respect_Score = sd(Respect_Score, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Respect_Score sd_Respect_Score
  <fct>       <int>              <dbl>            <dbl>
1 Non-PCOS      155               1.65            0.574
2 PCOS          149               1.59            0.484

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Respect_Score), is.finite(Respect_Score), !is.na(Respect_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Respect_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(0, 4)) +
  labs(
    title = "Respect_Score by PCOS Status",
    x = "PCOS Status",
    y = "Respect_Score"
  )

#### Quality Communication

Quality_Communication_Score_model <- aov(Quality_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Quality_Communication ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Quality_Communication by PCOS_Status
t = -0.07943, df = 300.81, p-value = 0.9367
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -2.775589  2.560218
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              47.17419               47.28188

cohens_d(
  Quality_Communication ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |        95% CI
-------------------------
-9.09e-03 | [-0.23, 0.22]

- Estimated using pooled SD.

summary(Quality_Communication_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1      1    0.88   0.006  0.937
Residuals   302  42340  140.20

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Quality_Communication = mean(Quality_Communication, na.rm = TRUE),
    sd_Quality_Communication = sd(Quality_Communication, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Quality_Communication sd_Quality_Communication
  <fct>       <int>                      <dbl>                    <dbl>
1 Non-PCOS      155                       47.2                     12.4
2 PCOS          149                       47.3                     11.2

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Quality_Communication), is.finite(Quality_Communication), !is.na(Quality_Communication))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Quality_Communication)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(15, 70)) +
  labs(
    title = "Quality_Communication_Score by PCOS Status",
    x = "PCOS Status",
    y = "Quality_Communication_Score"
  )

##### Negative Communication

Negative_Communication_Score_model <- aov(Negative_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Negative_Communication ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Negative_Communication by PCOS_Status
t = -0.70705, df = 296.89, p-value = 0.4801
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.9357275  0.4410750
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              7.141935               7.389262

cohens_d(
  Negative_Communication ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |        95% CI
-------------------------
-0.08     | [-0.31, 0.14]

- Estimated using pooled SD.

summary(Negative_Communication_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1    4.6   4.647   0.497  0.482
Residuals   302 2826.3   9.359

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Negative_Communication = mean(Negative_Communication, na.rm = TRUE),
    sd_Negative_Communication = sd(Negative_Communication, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Negative_Communication sd_Negative_Communication
  <fct>       <int>                       <dbl>                     <dbl>
1 Non-PCOS      155                        7.14                      3.30
2 PCOS          149                        7.39                      2.78

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Negative_Communication), is.finite (Negative_Communication), !is.na(Negative_Communication))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Negative_Communication)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(3, 18)) +
  labs(
    title = "Negative_Communication_Score by PCOS Status",
    x = "PCOS Status",
    y = "Negative_Communication_Score"
  )

#### Overall Communicataion

Overall_Communication_Score_model <- aov(Overall_Communication ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Overall_Communication ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Overall_Communication by PCOS_Status
t = 0.086906, df = 298.77, p-value = 0.9308
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -3.022427  3.301708
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              60.03226               59.89262

cohens_d(
  Overall_Communication ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |        95% CI
-------------------------
9.94e-03  | [-0.21, 0.23]

- Estimated using pooled SD.

summary(Overall_Communication_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1      1    1.48   0.008  0.931
Residuals   302  59571  197.26

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Overall_Communication = mean(Overall_Communication, na.rm = TRUE),
    sd_Overall_Communication = sd(Overall_Communication, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Overall_Communication sd_Overall_Communication
  <fct>       <int>                      <dbl>                    <dbl>
1 Non-PCOS      155                       60.0                     15.0
2 PCOS          149                       59.9                     13.0

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Overall_Communication), is.finite (Overall_Communication), !is.na(Overall_Communication))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Overall_Communication)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
  coord_cartesian(ylim = c(19, 85)) +
  labs(
    title = "Overall_Communication by PCOS Status",
    x = "PCOS Status",
    y = "Overall_Communication_Score"
  )

#### Trust Score

Trust_Score_model <- aov(Trust_Score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Trust_Score ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Trust_Score by PCOS_Status
t = -0.57959, df = 292.87, p-value = 0.5626
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -0.2451808  0.1336249
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              2.583871               2.639649

cohens_d(
  Trust_Score ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |        95% CI
-------------------------
-0.07     | [-0.29, 0.16]

- Estimated using pooled SD.

summary(Trust_Score_model)

             Df Sum Sq Mean Sq F value Pr(>F)
PCOS_Status   1   0.24  0.2364   0.333  0.564
Residuals   302 214.31  0.7096

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Trust_Score = mean(Trust_Score, na.rm = TRUE),
    sd_Trust_Score = sd(Trust_Score, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Trust_Score sd_Trust_Score
  <fct>       <int>            <dbl>          <dbl>
1 Non-PCOS      155             2.58          0.927
2 PCOS          149             2.64          0.745

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Trust_Score), is.finite (Trust_Score), !is.na(Trust_Score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Trust_Score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
    labs(
    title = "Trust_Score by PCOS Status",
    x = "PCOS Status",
    y = "Trust_Score"
  )

#### Hirsutism

Hirsutism_score_model <- aov(Hirsutism_score ~ PCOS_Status, data = PCOS_Data_Scored)
t.test(Hirsutism_score ~ PCOS_Status, data = PCOS_Data_Scored)


    Welch Two Sample t-test

data:  Hirsutism_score by PCOS_Status
t = -6.7147, df = 276.98, p-value = 1.065e-10
alternative hypothesis: true difference in means between group Non-PCOS and group PCOS is not equal to 0
95 percent confidence interval:
 -5.760408 -3.148533
sample estimates:
mean in group Non-PCOS     mean in group PCOS 
              3.619355               8.073826

cohens_d(
  Hirsutism_score ~ PCOS_Status,
  data = PCOS_Data_Scored
)

Cohen's d |         95% CI
--------------------------
-0.77     | [-1.01, -0.54]

- Estimated using pooled SD.

summary(Hirsutism_score_model)

             Df Sum Sq Mean Sq F value   Pr(>F)    
PCOS_Status   1   1507  1507.4   45.56 7.58e-11 ***
Residuals   302   9993    33.1                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

PCOS_Data_Scored |>
  group_by(PCOS_Status) |>
  summarise(
    n = n(),
    mean_Hirsutism_score = mean(Hirsutism_score, na.rm = TRUE),
    sd_Hirsutism_score = sd(Hirsutism_score, na.rm = TRUE)
  )

# A tibble: 2 × 4
  PCOS_Status     n mean_Hirsutism_score sd_Hirsutism_score
  <fct>       <int>                <dbl>              <dbl>
1 Non-PCOS      155                 3.62               4.95
2 PCOS          149                 8.07               6.48

plot_data <- PCOS_Data_Scored |>
  filter(!is.na(Hirsutism_score), is.finite (Hirsutism_score), !is.na(Hirsutism_score))


ggplot(PCOS_Data_Scored, aes(x = PCOS_Status, y = Hirsutism_score)) +
  geom_boxplot() +
  geom_jitter(width = 0.1, alpha = 0.4) +
    labs(
    title = "Hirsutism score by PCOS Status",
    x = "PCOS Status",
    y = "Hirsutism score"
  )

\(\chi^2 Analyses\)

vars <- c("Education", "Smoking", "Alcohol", "Exercise_per_day", "Exercise_per_week","Type_of_exercise","Conditions","Hysterectomy","Contraceptive","Alopecia_stage","Acne","Insulin_resistance_medications","Menopause","Stopped_period","Race")



fisher_results <- lapply(vars, function(v) {
  
  tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored$PCOS_Status)
  
  # column percentages (within PCOS group)
  prop_tab <- prop.table(tab, margin = 2) * 100
  
  test <- fisher.test(tab)
  
  data.frame(
    Variable = v,
    p_value = test$p.value,
    levels = nrow(tab),
    pct_table = paste(round(prop_tab, 1), collapse = " | ")
  )
})

fisher_table <- do.call(rbind, fisher_results)

fisher_table

                         Variable      p_value levels
1                       Education 3.245780e-01      5
2                         Smoking 2.785361e-02      2
3                         Alcohol 5.727535e-02      2
4                Exercise_per_day 6.586983e-02      5
5               Exercise_per_week 4.685501e-01      6
6                Type_of_exercise 5.127177e-01      3
7                      Conditions 7.371856e-01     11
8                    Hysterectomy 2.146467e-01      2
9                   Contraceptive 3.119180e-13      2
10                 Alopecia_stage 7.041877e-12      4
11                           Acne 6.634508e-07      2
12 Insulin_resistance_medications 6.218387e-14      2
13                      Menopause 2.832929e-01      2
14                 Stopped_period 1.000000e+00      2
15                           Race 4.847089e-01      3
                                                                                                                   pct_table
1                                                                    0.6 | 0.6 | 29 | 65.2 | 4.5 | 0 | 0 | 37.6 | 57.7 | 4.7
2                                                                                                  21.3 | 78.7 | 32.9 | 67.1
3                                                                                                   86.3 | 13.7 | 93.3 | 6.7
4                                                           18.1 | 19.4 | 32.9 | 8.4 | 21.3 | 10.1 | 16.8 | 47 | 10.1 | 16.1
5                                                10.3 | 26.5 | 19.4 | 7.1 | 5.8 | 31 | 6.7 | 28.2 | 25.5 | 10.1 | 5.4 | 24.2
6                                                                                        49.5 | 41.1 | 9.3 | 46 | 47.8 | 6.2
7  21.4 | 42.9 | 7.1 | 0 | 7.1 | 0 | 7.1 | 7.1 | 0 | 0 | 7.1 | 10.7 | 39.3 | 7.1 | 0 | 14.3 | 3.6 | 17.9 | 0 | 3.6 | 3.6 | 0
8                                                                                                    3.2 | 96.8 | 0.7 | 99.3
9                                                                                                   8.4 | 91.6 | 44.3 | 55.7
10                                                                               91 | 7.7 | 1.3 | 0 | 56.4 | 33.6 | 10.1 | 0
11                                                                                                 40.6 | 59.4 | 69.1 | 30.9
12                                                                                                  2.6 | 97.4 | 34.2 | 65.8
13                                                                                                   3.9 | 96.1 | 1.3 | 98.7
14                                                                                                       3.9 | 96.1 | 4 | 96
15                                                                                   16.8 | 69.7 | 13.5 | 21.5 | 67.8 | 10.7

desc_table <- lapply(vars, function(v) {
  
  PCOS_Data_Scored |>
    count(Level = .data[[v]]) |>
    mutate(
      Variable = v,
      pct = round(n / sum(n) * 100, 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    select(Variable, Level, `n (%)`)
  
}) |>
  bind_rows()

desc_table

                         Variable                   Level       n (%)
1                       Education            No Education    1 (0.3%)
2                       Education                 Primary    1 (0.3%)
3                       Education               Secondary 101 (33.2%)
4                       Education                Tertiary 187 (61.5%)
5                       Education         Post-Vocational   14 (4.6%)
6                         Smoking                     Yes    82 (27%)
7                         Smoking                      No   222 (73%)
8                         Alcohol                     Yes 271 (89.1%)
9                         Alcohol                      No  31 (10.2%)
10                        Alcohol                    <NA>    2 (0.7%)
11               Exercise_per_day            < 10 minutes  43 (14.1%)
12               Exercise_per_day           10-19 minutes  55 (18.1%)
13               Exercise_per_day           10-39 minutes 121 (39.8%)
14               Exercise_per_day           40-59 minutes   28 (9.2%)
15               Exercise_per_day            > 60 minutes  57 (18.8%)
16              Exercise_per_week                   1 day   26 (8.6%)
17              Exercise_per_week                  2 days  83 (27.3%)
18              Exercise_per_week                  3 days  68 (22.4%)
19              Exercise_per_week                  4 days   26 (8.6%)
20              Exercise_per_week                 >5 days   17 (5.6%)
21              Exercise_per_week                    none  84 (27.6%)
22               Type_of_exercise                    Mild 105 (34.5%)
23               Type_of_exercise                Moderate  98 (32.2%)
24               Type_of_exercise               Strenuous   17 (5.6%)
25               Type_of_exercise                    <NA>  84 (27.6%)
26                     Conditions                     T2D      6 (2%)
27                     Conditions                     H2P   17 (5.6%)
28                     Conditions              Heart Cond      3 (1%)
29                     Conditions            Constipation    5 (1.6%)
30                     Conditions                  Cancer    1 (0.3%)
31                     Conditions               T2D & H2P      6 (2%)
32                     Conditions        T2D,H2P,& Cancer    1 (0.3%)
33                     Conditions T2D,H2P,Heart, & Cancer    1 (0.3%)
34                     Conditions      T2D & Constipation    1 (0.3%)
35                     Conditions             T2D & Heart    1 (0.3%)
36                     Conditions                    <NA> 262 (86.2%)
37                   Hysterectomy                     Yes      6 (2%)
38                   Hysterectomy                      No   298 (98%)
39                  Contraceptive                     Yes    79 (26%)
40                  Contraceptive                      No   225 (74%)
41                 Alopecia_stage             no alopecia   225 (74%)
42                 Alopecia_stage                 Stage 1  62 (20.4%)
43                 Alopecia_stage                 Stage 2   17 (5.6%)
44                           Acne                     Yes 166 (54.6%)
45                           Acne                      No 138 (45.4%)
46 Insulin_resistance_medications                     Yes  55 (18.1%)
47 Insulin_resistance_medications                      No 249 (81.9%)
48                      Menopause                     Yes    8 (2.6%)
49                      Menopause                      No 296 (97.4%)
50                 Stopped_period                     Yes   12 (3.9%)
51                 Stopped_period                      No 292 (96.1%)
52                           Race                 African  58 (19.1%)
53                           Race             East Indian 209 (68.8%)
54                           Race                   Mixed  37 (12.2%)

table(PCOS_Data_Scored$PCOS_Status)


Non-PCOS     PCOS 
     155      149

library(dplyr)

vars <- c(
  "Education",
  "Smoking",
  "Alcohol",
  "Exercise_per_day",
  "Exercise_per_week",
  "Type_of_exercise",
  "Conditions",
  "Hysterectomy",
  "Contraceptive",
  "Alopecia_stage",
  "Acne",
  "Insulin_resistance_medications",
  "Menopause",
  "Stopped_period",
  "Race"
)


fisher_results <- lapply(vars, function(v) {
  
  tab <- table(
    PCOS_Data_Scored[[v]],
    PCOS_Data_Scored$PCOS_Status
  )
  
  prop_tab <- round(prop.table(tab, margin = 2) * 100, 1)
  
  test <- fisher.test(tab)
  
  OR <- NA
  LCL <- NA
  UCL <- NA
  OR_CI <- NA
  
  if (all(dim(tab) == c(2, 2))) {
    
    OR  <- unname(test$estimate)
    LCL <- test$conf.int[1]
    UCL <- test$conf.int[2]
    
    OR_CI <- sprintf("%.2f (%.2f, %.2f)", OR, LCL, UCL)
  }
  
  data.frame(
    Variable = v,
    Levels = nrow(tab),
    p_value = round(test$p.value, 4),
    Odds_Ratio = round(OR, 2),
    CI_Lower = round(LCL, 2),
    CI_Upper = round(UCL, 2),
    `OR (95% CI)` = OR_CI,
    pct_table = paste(prop_tab, collapse = " | "),
    stringsAsFactors = FALSE
  )
  
})

fisher_table <- bind_rows(fisher_results)

fisher_table

                         Variable Levels p_value Odds_Ratio CI_Lower CI_Upper
1                       Education      5  0.3246         NA       NA       NA
2                         Smoking      2  0.0279       0.55     0.32     0.95
3                         Alcohol      2  0.0573       0.45     0.18     1.05
4                Exercise_per_day      5  0.0659         NA       NA       NA
5               Exercise_per_week      6  0.4686         NA       NA       NA
6                Type_of_exercise      3  0.5127         NA       NA       NA
7                      Conditions     11  0.7372         NA       NA       NA
8                    Hysterectomy      2  0.2146       4.91     0.54   234.62
9                   Contraceptive      2  0.0000       0.12     0.06     0.23
10                 Alopecia_stage      4  0.0000         NA       NA       NA
11                           Acne      2  0.0000       0.31     0.19     0.50
12 Insulin_resistance_medications      2  0.0000       0.05     0.01     0.15
13                      Menopause      2  0.2833       2.95     0.52    30.35
14                 Stopped_period      2  1.0000       0.96     0.25     3.68
15                           Race      3  0.4847         NA       NA       NA
           OR..95..CI.
1                 <NA>
2    0.55 (0.32, 0.95)
3    0.45 (0.18, 1.05)
4                 <NA>
5                 <NA>
6                 <NA>
7                 <NA>
8  4.91 (0.54, 234.62)
9    0.12 (0.06, 0.23)
10                <NA>
11   0.31 (0.19, 0.50)
12   0.05 (0.01, 0.15)
13  2.95 (0.52, 30.35)
14   0.96 (0.25, 3.68)
15                <NA>
                                                                                                                   pct_table
1                                                                    0.6 | 0.6 | 29 | 65.2 | 4.5 | 0 | 0 | 37.6 | 57.7 | 4.7
2                                                                                                  21.3 | 78.7 | 32.9 | 67.1
3                                                                                                   86.3 | 13.7 | 93.3 | 6.7
4                                                           18.1 | 19.4 | 32.9 | 8.4 | 21.3 | 10.1 | 16.8 | 47 | 10.1 | 16.1
5                                                10.3 | 26.5 | 19.4 | 7.1 | 5.8 | 31 | 6.7 | 28.2 | 25.5 | 10.1 | 5.4 | 24.2
6                                                                                        49.5 | 41.1 | 9.3 | 46 | 47.8 | 6.2
7  21.4 | 42.9 | 7.1 | 0 | 7.1 | 0 | 7.1 | 7.1 | 0 | 0 | 7.1 | 10.7 | 39.3 | 7.1 | 0 | 14.3 | 3.6 | 17.9 | 0 | 3.6 | 3.6 | 0
8                                                                                                    3.2 | 96.8 | 0.7 | 99.3
9                                                                                                   8.4 | 91.6 | 44.3 | 55.7
10                                                                               91 | 7.7 | 1.3 | 0 | 56.4 | 33.6 | 10.1 | 0
11                                                                                                 40.6 | 59.4 | 69.1 | 30.9
12                                                                                                  2.6 | 97.4 | 34.2 | 65.8
13                                                                                                   3.9 | 96.1 | 1.3 | 98.7
14                                                                                                       3.9 | 96.1 | 4 | 96
15                                                                                   16.8 | 69.7 | 13.5 | 21.5 | 67.8 | 10.7

desc_table <- lapply(vars, function(v) {
  
  PCOS_Data_Scored |>
    count(Level = .data[[v]]) |>
    mutate(
      Variable = v,
      pct = round(100 * n / sum(n), 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    select(Variable, Level, `n (%)`)
  
}) |>
  bind_rows()

desc_table

                         Variable                   Level       n (%)
1                       Education            No Education    1 (0.3%)
2                       Education                 Primary    1 (0.3%)
3                       Education               Secondary 101 (33.2%)
4                       Education                Tertiary 187 (61.5%)
5                       Education         Post-Vocational   14 (4.6%)
6                         Smoking                     Yes    82 (27%)
7                         Smoking                      No   222 (73%)
8                         Alcohol                     Yes 271 (89.1%)
9                         Alcohol                      No  31 (10.2%)
10                        Alcohol                    <NA>    2 (0.7%)
11               Exercise_per_day            < 10 minutes  43 (14.1%)
12               Exercise_per_day           10-19 minutes  55 (18.1%)
13               Exercise_per_day           10-39 minutes 121 (39.8%)
14               Exercise_per_day           40-59 minutes   28 (9.2%)
15               Exercise_per_day            > 60 minutes  57 (18.8%)
16              Exercise_per_week                   1 day   26 (8.6%)
17              Exercise_per_week                  2 days  83 (27.3%)
18              Exercise_per_week                  3 days  68 (22.4%)
19              Exercise_per_week                  4 days   26 (8.6%)
20              Exercise_per_week                 >5 days   17 (5.6%)
21              Exercise_per_week                    none  84 (27.6%)
22               Type_of_exercise                    Mild 105 (34.5%)
23               Type_of_exercise                Moderate  98 (32.2%)
24               Type_of_exercise               Strenuous   17 (5.6%)
25               Type_of_exercise                    <NA>  84 (27.6%)
26                     Conditions                     T2D      6 (2%)
27                     Conditions                     H2P   17 (5.6%)
28                     Conditions              Heart Cond      3 (1%)
29                     Conditions            Constipation    5 (1.6%)
30                     Conditions                  Cancer    1 (0.3%)
31                     Conditions               T2D & H2P      6 (2%)
32                     Conditions        T2D,H2P,& Cancer    1 (0.3%)
33                     Conditions T2D,H2P,Heart, & Cancer    1 (0.3%)
34                     Conditions      T2D & Constipation    1 (0.3%)
35                     Conditions             T2D & Heart    1 (0.3%)
36                     Conditions                    <NA> 262 (86.2%)
37                   Hysterectomy                     Yes      6 (2%)
38                   Hysterectomy                      No   298 (98%)
39                  Contraceptive                     Yes    79 (26%)
40                  Contraceptive                      No   225 (74%)
41                 Alopecia_stage             no alopecia   225 (74%)
42                 Alopecia_stage                 Stage 1  62 (20.4%)
43                 Alopecia_stage                 Stage 2   17 (5.6%)
44                           Acne                     Yes 166 (54.6%)
45                           Acne                      No 138 (45.4%)
46 Insulin_resistance_medications                     Yes  55 (18.1%)
47 Insulin_resistance_medications                      No 249 (81.9%)
48                      Menopause                     Yes    8 (2.6%)
49                      Menopause                      No 296 (97.4%)
50                 Stopped_period                     Yes   12 (3.9%)
51                 Stopped_period                      No 292 (96.1%)
52                           Race                 African  58 (19.1%)
53                           Race             East Indian 209 (68.8%)
54                           Race                   Mixed  37 (12.2%)

table(PCOS_Data_Scored$PCOS_Status)


Non-PCOS     PCOS 
     155      149

table(PCOS_Data_Scored$PCOS_Status)


Non-PCOS     PCOS 
     155      149

table(PCOS_Data_Scored$PCOS_Status,PCOS_Data_Scored$Acne)

          
           Yes  No
  Non-PCOS  63  92
  PCOS     103  46

table(PCOS_Data_Scored$Race,PCOS_Data_Scored$PCOS_Status)

             
              Non-PCOS PCOS
  African           26   32
  East Indian      108  101
  Mixed             21   16

library(gt)


vars <- c(
  "Education", "Smoking", "Alcohol", "Exercise_per_day",
  "Exercise_per_week", "Type_of_exercise", "Conditions",
  "Hysterectomy", "Contraceptive", "Alopecia_stage",
  "Acne", "Insulin_resistance_medications",
  "Menopause", "Stopped_period", "Race"
)

group_var <- "PCOS_Status"


var_labels <- c(
  Education = "Education Level",
  Smoking = "Smoking Status",
  Alcohol = "Alcohol Use",
  Exercise_per_day = "Exercise (per day)",
  Exercise_per_week = "Exercise (per week)",
  Type_of_exercise = "Type of Exercise",
  Conditions = "Comorbid Conditions",
  Hysterectomy = "History of Hysterectomy",
  Contraceptive = "Contraceptive Use",
  Alopecia_stage = "Alopecia Stage",
  Acne = "Acne",
  Insulin_resistance_medications = "IR Medications",
  Menopause = "Menopause Status",
  Stopped_period = "Stopped Period",
  Race = "Race"
)

table1 <- map_dfr(vars, function(v) {
  
  tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored[[group_var]])
  
  test <- if (any(tab < 5)) {
    fisher.test(tab)
  } else {
    chisq.test(tab)
  }
  
  df <- as.data.frame(tab) |>
    rename(Level = Var1, Group = Var2, n = Freq) |>
    mutate(
      Level = as.character(Level),
      Group = as.character(Group)
    ) |>
    group_by(Group) |>
    mutate(
      pct = round(n / sum(n) * 100, 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    ungroup()
  
  df_wide <- df |>
    select(Level, Group, `n (%)`) |>
    pivot_wider(names_from = Group, values_from = `n (%)`)
  
  df_wide |>
    mutate(
      Variable = v,
      p_value = test$p.value,
      .before = 1
    )
  
}) |>
  group_by(Variable) |>
  mutate(
    p_value = ifelse(row_number() == 1, p_value, NA)
  ) |>
  ungroup()

table1 <- table1 |>
  mutate(
    Variable = recode(Variable, !!!var_labels),
    
    # indent levels for readability
    Level = paste0("   ", Level),
    
    # format p-values
    p_value = case_when(
      is.na(p_value) ~ "",
      p_value < 0.001 ~ "<0.001",
      TRUE ~ sprintf("%.3f", p_value)
    )
  )


gt_table <- table1 |>
  gt() |>
  cols_label(
    Variable = "",
    Level = "",
    p_value = "p-value"
  ) |>
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      columns = Variable,
      rows = !duplicated(Variable)
    )
  ) |>
  cols_align(
    align = "center",
    -c(Variable, Level)
  )


gt_table

	p-value		Non-PCOS	PCOS
Education Level	0.325	No Education	1 (0.6%)	0 (0%)
Education Level		Primary	1 (0.6%)	0 (0%)
Education Level		Secondary	45 (29%)	56 (37.6%)
Education Level		Tertiary	101 (65.2%)	86 (57.7%)
Education Level		Post-Vocational	7 (4.5%)	7 (4.7%)
Smoking Status	0.032	Yes	33 (21.3%)	49 (32.9%)
Smoking Status		No	122 (78.7%)	100 (67.1%)
Alcohol Use	0.069	Yes	132 (86.3%)	139 (93.3%)
Alcohol Use		No	21 (13.7%)	10 (6.7%)
Exercise (per day)	0.066	< 10 minutes	28 (18.1%)	15 (10.1%)
Exercise (per day)		10-19 minutes	30 (19.4%)	25 (16.8%)
Exercise (per day)		10-39 minutes	51 (32.9%)	70 (47%)
Exercise (per day)		40-59 minutes	13 (8.4%)	15 (10.1%)
Exercise (per day)		> 60 minutes	33 (21.3%)	24 (16.1%)
Exercise (per week)	0.465	1 day	16 (10.3%)	10 (6.7%)
Exercise (per week)		2 days	41 (26.5%)	42 (28.2%)
Exercise (per week)		3 days	30 (19.4%)	38 (25.5%)
Exercise (per week)		4 days	11 (7.1%)	15 (10.1%)
Exercise (per week)		>5 days	9 (5.8%)	8 (5.4%)
Exercise (per week)		none	48 (31%)	36 (24.2%)
Type of Exercise	0.497	Mild	53 (49.5%)	52 (46%)
Type of Exercise		Moderate	44 (41.1%)	54 (47.8%)
Type of Exercise		Strenuous	10 (9.3%)	7 (6.2%)
Comorbid Conditions	0.737	T2D	3 (21.4%)	3 (10.7%)
Comorbid Conditions		H2P	6 (42.9%)	11 (39.3%)
Comorbid Conditions		Heart Cond	1 (7.1%)	2 (7.1%)
Comorbid Conditions		PCOS	0 (0%)	0 (0%)
Comorbid Conditions		Constipation	1 (7.1%)	4 (14.3%)
Comorbid Conditions		Cancer	0 (0%)	1 (3.6%)
Comorbid Conditions		T2D & H2P	1 (7.1%)	5 (17.9%)
Comorbid Conditions		T2D,H2P,& Cancer	1 (7.1%)	0 (0%)
Comorbid Conditions		T2D,H2P,Heart, & Cancer	0 (0%)	1 (3.6%)
Comorbid Conditions		T2D & Constipation	0 (0%)	1 (3.6%)
Comorbid Conditions		T2D & Heart	1 (7.1%)	0 (0%)
History of Hysterectomy	0.215	Yes	5 (3.2%)	1 (0.7%)
History of Hysterectomy		No	150 (96.8%)	148 (99.3%)
Contraceptive Use	<0.001	Yes	13 (8.4%)	66 (44.3%)
Contraceptive Use		No	142 (91.6%)	83 (55.7%)
Alopecia Stage	<0.001	no alopecia	141 (91%)	84 (56.4%)
Alopecia Stage		Stage 1	12 (7.7%)	50 (33.6%)
Alopecia Stage		Stage 2	2 (1.3%)	15 (10.1%)
Alopecia Stage		Stage 3	0 (0%)	0 (0%)
Acne	<0.001	Yes	63 (40.6%)	103 (69.1%)
Acne		No	92 (59.4%)	46 (30.9%)
IR Medications	<0.001	Yes	4 (2.6%)	51 (34.2%)
IR Medications		No	151 (97.4%)	98 (65.8%)
Menopause Status	0.283	Yes	6 (3.9%)	2 (1.3%)
Menopause Status		No	149 (96.1%)	147 (98.7%)
Stopped Period	1.000	Yes	6 (3.9%)	6 (4%)
Stopped Period		No	149 (96.1%)	143 (96%)
Race	0.493	African	26 (16.8%)	32 (21.5%)
Race		East Indian	108 (69.7%)	101 (67.8%)
Race		Mixed	21 (13.5%)	16 (10.7%)

library(flextable)


Attaching package: 'flextable'

The following objects are masked from 'package:kableExtra':

    as_image, footnote

The following object is masked from 'package:purrr':

    compose

library(officer)

ft <- flextable(table1)

ft <- ft |>
  autofit() |>
  theme_booktabs() |>
  align(align = "center", part = "all") |>
  bold(i = ~ !duplicated(Variable), j = 1)  # bold variable names

doc <- read_docx() |>
  body_add_par("Table 1. Descriptive Characteristics by PCOS Status", style = "heading 1") |>
  body_add_flextable(ft)

print(doc, target = "Table1.docx")

cont_vars <- c(
  "Age", "Weight", "Info_Score", "Emo_Score", "Respect_Score",
  "Quality_Communication", "Negative_Communication",
  "Overall_Communication", "Trust_Score", "Hirsutism_score"
)

group_var <- "PCOS_Status"

var_labels <- c(
  Age = "Age",
  Weight = "Weight",
  Info_Score = "Information Score",
  Emo_Score = "Emotion Score",
  Respect_Score = "Respect Score",
  Quality_Communication = "Quality Communication",
  Negative_Communication = "Negative Communication",
  Overall_Communication = "Overall Communication",
  Trust_Score = "Trust Score",
  Hirsutism_score = "Hirsutism Score"
)

table2 <- map_dfr(cont_vars, function(v) {
  
  #
  test <- t.test(PCOS_Data_Scored[[v]] ~ PCOS_Data_Scored[[group_var]])
  
  # group summaries
  sum_stats <- PCOS_Data_Scored |>
    group_by(.data[[group_var]]) |>
    summarise(
      mean = mean(.data[[v]], na.rm = TRUE),
      sd = sd(.data[[v]], na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(
      `Mean (SD)` = sprintf("%.2f (%.2f)", mean, sd)
    )
  

  wide <- sum_stats |>
    select(all_of(group_var), `Mean (SD)`) |>
    pivot_wider(names_from = all_of(group_var), values_from = `Mean (SD)`)
  
  wide |>
    mutate(
      Variable = v,
      p_value = test$p.value,
      .before = 1
    )
})

table2 <- table2 |>
  mutate(
    Variable = recode(Variable, !!!var_labels),
    
    p_value = case_when(
      p_value < 0.001 ~ "<0.001",
      TRUE ~ sprintf("%.3f", p_value)
    )
  )


ft <- flextable(table2)

ft <- ft |>
  autofit() |>
  theme_booktabs() |>
  align(align = "center", part = "all") |>
  bold(j = 1) |>
  set_header_labels(
    Variable = "",
    p_value = "p-value"
  )


doc <- read_docx() |>
  body_add_par("Table 2. Continuous Variables by PCOS Status", style = "heading 1") |>
  body_add_flextable(ft)

print(doc, target = "Table2_PCOS.docx")

Table with stats values:

library(dplyr)
library(purrr)
library(tidyr)
library(flextable)
library(officer)


cont_vars <- c(
  "Age", "Weight", "Info_Score", "Emo_Score", "Respect_Score",
  "Quality_Communication", "Negative_Communication",
  "Overall_Communication", "Trust_Score", "Hirsutism_score"
)

group_var <- "PCOS_Status"

var_labels <- c(
  Age = "Age",
  Weight = "Weight",
  Info_Score = "Information Score",
  Emo_Score = "Emotion Score",
  Respect_Score = "Respect Score",
  Quality_Communication = "Quality Communication",
  Negative_Communication = "Negative Communication",
  Overall_Communication = "Overall Communication",
  Trust_Score = "Trust Score",
  Hirsutism_score = "Hirsutism Score"
)

table2 <- map_dfr(cont_vars, function(v) {
 
  df <- PCOS_Data_Scored |>
    filter(!is.na(.data[[v]]), !is.na(.data[[group_var]])) |>
    mutate(group = as.factor(.data[[group_var]]))
  
  levels_group <- levels(df$group)

  test <- t.test(df[[v]] ~ df$group)
  
  # group summaries
  sum_stats <- df |>
    group_by(group) |>
    summarise(
      mean = mean(.data[[v]], na.rm = TRUE),
      sd = sd(.data[[v]], na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(
      Mean_SD = sprintf("%.2f (%.2f)", mean, sd)
    )
  
  # reshape wide
  wide <- sum_stats |>
    select(group, Mean_SD) |>
    pivot_wider(names_from = group, values_from = Mean_SD)
  
  # mean difference (Group1 - Group2)
  est <- test$estimate
  mean_diff <- unname(est[1] - est[2])
  
  wide |>
    mutate(
      Variable = v,
      Mean_Diff = sprintf("%.2f", mean_diff),
      CI = sprintf("(%.2f, %.2f)", test$conf.int[1], test$conf.int[2]),
      t_value = sprintf("%.2f", unname(test$statistic)),
      p_value = case_when(
        is.na(test$p.value) ~ "",
        test$p.value < 0.001 ~ "<0.001",
        TRUE ~ sprintf("%.3f", test$p.value)
      ),
      .before = 1
    )
})


table2 <- table2 |>
  mutate(
    Variable = recode(Variable, !!!var_labels)
  )


ft <- flextable(table2)

ft <- ft |>
  theme_booktabs() |>
  autofit() |>
  align(align = "center", part = "all") |>
  bold(j = 1) |>
  set_header_labels(
    Variable = "",
    Mean_Diff = "Mean Difference",
    CI = "95% CI",
    t_value = "t",
    p_value = "p-value"
  )

doc <- read_docx() |>
  body_add_par("Table 2. Continuous Variables by PCOS Status", style = "heading 1") |>
  body_add_par("Values are mean (SD) unless otherwise indicated. Mean difference represents Group 1 minus Group 2.", style = "Normal") |>
  body_add_flextable(ft)

print(doc, target = "Table2_PCOS.docx")

table(PCOS_Data_Scored$PCOS_Status)


Non-PCOS     PCOS 
     155      149

table1 <- map_dfr(vars, function(v) {
  
  tab <- table(PCOS_Data_Scored[[v]], PCOS_Data_Scored[[group_var]])
  
  use_fisher <- any(tab < 5)
  
  test <- if (use_fisher) {
    fisher.test(tab)
  } else {
    chisq.test(tab)
  }
  
  df <- as.data.frame(tab) |>
    rename(Level = Var1, Group = Var2, n = Freq) |>
    mutate(
      Level = as.character(Level),
      Group = as.character(Group)
    ) |>
    group_by(Group) |>
    mutate(
      pct = round(n / sum(n) * 100, 1),
      `n (%)` = paste0(n, " (", pct, "%)")
    ) |>
    ungroup()
  
  df_wide <- df |>
    select(Level, Group, `n (%)`) |>
    pivot_wider(names_from = Group, values_from = `n (%)`)
  
  df_wide |>
    mutate(
      Variable = v,
      test_type = ifelse(use_fisher, "Fisher", "χ²"),
      statistic = ifelse(use_fisher, NA, unname(test$statistic)),
      p_value = test$p.value,
      .before = 1
    )
  
}) |>
  group_by(Variable) |>
  mutate(
    p_value = ifelse(row_number() == 1, p_value, NA),
    test_type = ifelse(row_number() == 1, test_type, NA),
    statistic = ifelse(row_number() == 1, statistic, NA)
  ) |>
  ungroup()


table1 <- table1 |>
  mutate(
    Variable = recode(Variable, !!!var_labels),
    
    Level = paste0("   ", Level),
    
    p_value = case_when(
      is.na(p_value) ~ "",
      p_value < 0.001 ~ "<0.001",
      TRUE ~ sprintf("%.3f", p_value)
    ),
    
    statistic = case_when(
      is.na(statistic) ~ "",
      TRUE ~ sprintf("%.2f", statistic)
    )
  )


ft <- flextable(table1)

ft <- ft |>
  autofit() |>
  theme_booktabs() |>
  align(align = "center", part = "all") |>
  bold(i = ~ !duplicated(Variable), j = 1) |>
  set_header_labels(
    Variable = "",
    Level = "",
    test_type = "Test",
    statistic = "Statistic",
    p_value = "p-value"
  )


doc <- read_docx() |>
  body_add_par("Table 1. Descriptive Characteristics by PCOS Status", style = "heading 1") |>
  body_add_par("Values are n (%). χ² = Chi-square test; Fisher = Fisher’s exact test.", style = "Normal") |>
  body_add_flextable(ft)

print(doc, target = "Table1.docx")