Load Libraries

library(dplyr)
library(RVAideMemoire)
library(ggplot2)
library(tidyr)
library(kableExtra)
library(formattable)

Load Data

df <- read.csv(file="new_data/export.csv", header=T)
traj <- read.csv(file="df_8.csv", header=T)

df$trajectory_bu <- df$trajectory

df$trajectory[df$trajectory_bu == "1"] <- "Class 3, n = 11"
df$trajectory[df$trajectory_bu == "2"] <- "Class 6, n = 2"
df$trajectory[df$trajectory_bu == "3"] <- "Class 5, n = 13"
df$trajectory[df$trajectory_bu == "4"] <- "Class 2, n = 28"
df$trajectory[df$trajectory_bu == "5"] <- "Class 4, n = 17"
df$trajectory[df$trajectory_bu == "6"] <- "Class 1, n = 16"
df$trajectory[df$trajectory_bu == "7"] <- "Class 8, n = 3"
df$trajectory[df$trajectory_bu == "8"] <- "Class 7, n = 3"

df$trajectory <- factor(df$trajectory, levels = c("Class 1, n = 16",
                                                      "Class 2, n = 28",
                                                      "Class 3, n = 11",
                                                      "Class 4, n = 17",
                                                      "Class 5, n = 13",
                                                      "Class 6, n = 2",
                                                      "Class 7, n = 3",
                                                      "Class 8, n = 3"))

Trajectory x Grade

Statistical Comparison

Using Fisher’s Exact Test, since some cell sizes are zero

Fisher’s is significant (p = .050) but posthocs are not

grades <- subset(df, select=c(altid, BIOL_Grade))
ids <- unique(grades$altid)
out <- data.frame()
n <- 1

for (i in 1:93) {
  id <- ids[n]
  temp <- subset(grades, altid == id)
  
  if (all(temp$BIOL_Grade %in% c("A", "A+", "A-"))) {
    out <- rbind(out, data.frame(altid = id, status = "high"))
  } else if (any(temp$BIOL_Grade %in% c("C", "C+", "D", "F", "NP"))) {
    out <- rbind(out, data.frame(altid = id, status = "low"))
  } else if (any(temp$BIOL_Grade %in% c("B", "B-", "B+"))) {
    out <- rbind(out, data.frame(altid = id, status = "mid"))
  }
  
  n <- n + 1
}

df2 <- unique(subset(df, select=c(altid, trajectory)))
df3 <- merge(out, df2, by = "altid")

table(df3$status)
## 
## high  low  mid 
##   50   15   28
table(df3$trajectory, df3$status)
##                  
##                   high low mid
##   Class 1, n = 16    4   5   7
##   Class 2, n = 28   13   5  10
##   Class 3, n = 11    4   3   4
##   Class 4, n = 17   12   1   4
##   Class 5, n = 13   12   0   1
##   Class 6, n = 2     1   0   1
##   Class 7, n = 3     2   0   1
##   Class 8, n = 3     2   1   0
# Perform Fisher's Exact Test
# Increase workspace size
xtab <- table(df3$trajectory, df3$status)
fisher.test(xtab, workspace = 1e+09)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  xtab
## p-value = 0.04961
## alternative hypothesis: two.sided
fisher.multcomp(xtab)
## 
##         Pairwise comparisons using Fisher's exact test for count data
## 
## data:  xtab
## 
##                                 high:low high:mid low:mid
## Class 1, n = 16:Class 2, n = 28   1.0000   1.0000       1
## Class 1, n = 16:Class 3, n = 11   1.0000   1.0000       1
## Class 1, n = 16:Class 4, n = 17   0.6068   0.6897       1
## Class 1, n = 16:Class 5, n = 13   0.3279   0.3279       1
## Class 1, n = 16:Class 6, n = 2    1.0000   1.0000       1
## Class 1, n = 16:Class 7, n = 3    1.0000   1.0000       1
## Class 1, n = 16:Class 8, n = 3    1.0000   1.0000       1
## Class 2, n = 28:Class 3, n = 11   1.0000   1.0000       1
## Class 2, n = 28:Class 4, n = 17   1.0000   1.0000       1
## Class 2, n = 28:Class 5, n = 13   0.6897   0.6068       1
## Class 2, n = 28:Class 6, n = 2    1.0000   1.0000       1
## Class 2, n = 28:Class 7, n = 3    1.0000   1.0000       1
## Class 2, n = 28:Class 8, n = 3    1.0000   1.0000       1
## Class 3, n = 11:Class 4, n = 17   0.9439   1.0000       1
## Class 3, n = 11:Class 5, n = 13   0.6068   0.6646       1
## Class 3, n = 11:Class 6, n = 2    1.0000   1.0000       1
## Class 3, n = 11:Class 7, n = 3    1.0000   1.0000       1
## Class 3, n = 11:Class 8, n = 3    1.0000   1.0000       1
## Class 4, n = 17:Class 5, n = 13   1.0000   1.0000       1
## Class 4, n = 17:Class 6, n = 2    1.0000   1.0000       1
## Class 4, n = 17:Class 7, n = 3    1.0000   1.0000       1
## Class 4, n = 17:Class 8, n = 3    1.0000   1.0000       1
## Class 5, n = 13:Class 6, n = 2    1.0000   1.0000       1
## Class 5, n = 13:Class 7, n = 3    1.0000   1.0000       1
## Class 5, n = 13:Class 8, n = 3    1.0000   1.0000       1
## Class 6, n = 2:Class 7, n = 3     1.0000   1.0000       1
## Class 6, n = 2:Class 8, n = 3     1.0000   1.0000       1
## Class 7, n = 3:Class 8, n = 3     1.0000   1.0000       1
## 
## P value adjustment method: fdr

Plots

convert_grade <- function(grade) {
  case_when(
    grade %in% c("A+", "A") ~ 4,
    grade == "A-" ~ 3.7,
    grade == "B+" ~ 3.3,
    grade == "B" ~ 3,
    grade == "B-" ~ 2.7,
    grade == "C+" ~ 2.3,
    grade == "C" ~ 2,
    grade == "C-" ~ 1.7,
    grade == "D+" ~ 1.3,
    grade == "D" ~ 1,
    grade == "D-" ~ 0.7,
    grade == "F" ~ 0,
    grade == "NP" ~ 0,
    TRUE ~ NA_real_  # Handle any other cases (shouldn't happen with this setup)
  )
}

# Apply the function to the BIOL_Grade column
df <- df %>%
  mutate(grade1 = convert_grade(BIOL_Grade))

df$altid <- as.factor(df$altid)
df$trajectory <- as.factor(df$trajectory)
df$BIOL_Crse <- factor(df$BIOL_Crse, levels = c("F040A", "F040B", "F040C"))

tempdf <- unique(subset(df, select=c(altid, trajectory, BIOL_Crse, grade1)))

tempdf_filtered <- tempdf %>%
  group_by(altid) %>%
  filter(n() <= 3)  # Keep participants who have three or fewer rows

tempdf_filtered <- subset(tempdf_filtered, altid != 982)

ggplot(tempdf_filtered, aes(x = BIOL_Crse, y = grade1, group = altid)) +
  geom_line(size = .5, alpha = .25) +  # Adjust line size if necessary
  labs(title = "Student Grades by Class and Trajectory",
       x = "Class", y = "Grade") +
  scale_color_discrete(name = "Trajectory") +  # Customize legend title
  facet_wrap(~trajectory) +
  theme_minimal()  # Optional: Use a minimal theme

# overlay trajectories over grade plots?

summary_table <- traj %>%
  group_by(classn, courseid) %>%
  summarize(avg_std_id = mean(std_id, na.rm = TRUE)) %>%
  ungroup()

tempdf_filtered$grade2 <- scale(tempdf_filtered$grade1, center=T, scale=T)
summary_table$BIOL_Crse <- NA
summary_table$BIOL_Crse[summary_table$courseid == "1 40A Pre"] <- "F040A"
summary_table$BIOL_Crse[summary_table$courseid == "3 mid"] <- "F040B"
summary_table$BIOL_Crse[summary_table$courseid == "6 40C Post"] <- "F040C"
summary_table <- subset(summary_table, select=-c(courseid))
summary_table$classn <- as.factor(summary_table$classn)

merged_df <- tempdf_filtered %>%
  left_join(summary_table, by = c("trajectory" = "classn", "BIOL_Crse" = "BIOL_Crse"))

ggplot(merged_df) +
  geom_line(size = .5, alpha = .25, aes(x = BIOL_Crse, y = grade2, group = altid)) + 
  geom_line(aes(x = BIOL_Crse, y = avg_std_id, group = trajectory), size = 1, color = "red", alpha = 1) + 
  labs(title = "Standardized Student Grades by Class and Trajectory with ID Overlaid",
       x = "Class", y = "Grade") + ylim(-3,2) +
  scale_color_discrete(name = "Trajectory") +  # Customize legend title
  facet_wrap(~trajectory) +
  theme_minimal()  # Optional: Use a minimal theme

Trajectory x Demographics

Career Goals

Using Fisher’s Exact Test, since some cell sizes are zero

Fisher’s is not significant

df <- df %>%
  mutate(cg = case_when(
    Career.Goal %in% c("Dental Assisting/Dental Hygiene", "Dietician/Nutrition", 
                       "EMS/EMT/Paramedic", "Kinesiology/Sports Medicine/Athletic Training", 
                       "Occupational Therapy", "Physical Therapy", "Physician Assistant", 
                       "Radiology", "Respiratory Therapy", "Speech Therapy") ~ "Allied Health",
    Career.Goal == "Nursing" ~ "Nursing",
    Career.Goal %in% c("Public Health", "Dentist", "Medical School", 
                       "Pharmacy Technologist", "Pharmacist") ~ "Other Health",
    Career.Goal %in% c("Veterinary Assistant", "Veterinary Technology") ~ "Veterinary Studies",
    Career.Goal %in% c("Counseling", "Biologist", "Biomedical Researchers", 
                       "Engineering", "Chemistry", "Psychology") ~ "STEM",
    TRUE ~ "Other"  # If there are any other values not specified in the rules
  ))
# table(df$trajectory, df$cg)
# table(df$altid, df$cg)
carid <- as.data.frame.matrix(table(df$altid, df$cg))
# carid %>%
#   mutate(multiple_categories = ifelse(rowSums(select(., `Allied Health`, Nursing, Other, `Other Health`) > 0) > 1, 1, 0))

carid <- carid %>%
  mutate(max_category = names(carid)[max.col(carid, "first")]) %>%
  mutate(multiple_categories = ifelse(rowSums(select(., `Allied Health`, Nursing, Other, `Other Health`) > 0) > 1, 1, 0))

carid$max_category[carid$multiple_categories == 1] <- "Multiple"
carid$altid <- rownames(carid)
carid <- subset(carid, select=c(altid,max_category))
colnames(carid) <- c("altid","cg2")

df <- df %>%
  left_join(carid, by = "altid")

# Create a summary table with counts and percentages
summary_table <- df %>%
  group_by(trajectory, cg2) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  group_by(trajectory) %>%
  mutate(percentage = round(count / sum(count) * 100)) %>%
  ungroup()

# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
  pivot_wider(names_from = cg2, values_from = c(count, percentage), 
              names_glue = "{cg2}_{.value}")

# Calculate the overall totals and percentages for each trajectory
totals <- df %>%
  group_by(trajectory) %>%
  summarise(Total = n())

# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
  left_join(totals, by = "trajectory")

# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0

# Format percentage columns
summary_table_wide <- summary_table_wide %>%
  mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))

# Format the table in APA style using kableExtra
summary_table_wide %>%
  kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory", 
                                                                "Allied Health","Multiple","Nursing","Other Health",
                                                                "Allied Health","Multiple","Nursing","Other Health",
                                                                "Total")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  add_header_above(c(" " = 1, "Count" = 4, "Percent" = 4, " " = 1))
Count
Percent
Trajectory Allied Health Multiple Nursing Other Health Allied Health Multiple Nursing Other Health Total
Class 1, n = 16 22 25 17 4 32% 37% 25% 6% 68
Class 2, n = 28 35 28 47 12 29% 23% 39% 10% 122
Class 3, n = 11 25 10 7 4 54% 22% 15% 9% 46
Class 4, n = 17 21 13 37 0 30% 18% 52% 0% 71
Class 5, n = 13 13 22 19 0 24% 41% 35% 0% 54
Class 6, n = 2 4 3 0 0 57% 43% 0% 0% 7
Class 7, n = 3 0 9 4 0 0% 69% 31% 0% 13
Class 8, n = 3 9 0 6 0 60% 0% 40% 0% 15
df2 <- unique(subset(df, select=c(altid,trajectory,cg2)))

xtab <- table(df2$trajectory, df2$cg2)
fisher.test(xtab, workspace = 1e+09)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  xtab
## p-value = 0.7537
## alternative hypothesis: two.sided
# fisher.multcomp(xtab)

Current Employment

Using Fisher’s Exact Test, since some cell sizes are zero

Fisher’s is not significant

table(df$CurrentlyEmployed)
## 
##  No Yes 
## 261 107
# Create a summary table with counts and percentages
summary_table <- df %>%
  group_by(trajectory, CurrentlyEmployed) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  group_by(trajectory) %>%
  mutate(percentage = round(count / sum(count) * 100)) %>%
  ungroup()

# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
  pivot_wider(names_from = CurrentlyEmployed, values_from = c(count, percentage), 
              names_glue = "{CurrentlyEmployed}_{.value}")

# Calculate the overall totals and percentages for each trajectory
totals <- df %>%
  group_by(trajectory) %>%
  summarise(Total = n())

# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
  left_join(totals, by = "trajectory")

# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0

# Format percentage columns
summary_table_wide <- summary_table_wide %>%
  mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))

# Format the table in APA style using kableExtra
summary_table_wide %>%
  kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory", 
                                                                "Not Currently Employed","Currently Employed","NA",
                                                                "Not Currently Employed","Currently Employed","NA",
                                                                "Total")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  add_header_above(c(" " = 1, "Count" = 3, "Percent" = 3, " " = 1))
Count
Percent
Trajectory Not Currently Employed Currently Employed NA Not Currently Employed Currently Employed NA Total
Class 1, n = 16 56 6 6 82% 9% 9% 68
Class 2, n = 28 74 39 9 61% 32% 7% 122
Class 3, n = 11 29 14 3 63% 30% 7% 46
Class 4, n = 17 56 12 3 79% 17% 4% 71
Class 5, n = 13 24 25 5 44% 46% 9% 54
Class 6, n = 2 4 3 0 57% 43% 0% 7
Class 7, n = 3 6 5 2 46% 38% 15% 13
Class 8, n = 3 12 3 0 80% 20% 0% 15
df2 <- unique(subset(df, select=c(altid,trajectory,CurrentlyEmployed)))

xtab <- table(df2$trajectory, df2$CurrentlyEmployed)
fisher.test(xtab, workspace = 1e+09)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  xtab
## p-value = 0.3324
## alternative hypothesis: two.sided
# fisher.multcomp(xtab)

Ethnicity

Using Fisher’s Exact Test, since some cell sizes are zero

Fisher’s is significant (p < .043) but posthocs are not

table(df$Ethnicity, useNA = "always")
## 
##           African American/Black An Ethnicity Which is not Listed 
##                                4                               21 
##              Asian: Asian Indian                   Asian: Chinese 
##                                8                               17 
##                  Asian: Filipinx                    Asian: Korean 
##                               56                               14 
##                Asian: Vietnamese         Latinx: Central American 
##                               75                                9 
##        Latinx: Chicanx / Mexican           Latinx: South American 
##                               96                               10 
##                  White: European             White: Midde Eastern 
##                               75                                4 
##             White: North African                             <NA> 
##                                1                                6
df <- df %>%
  mutate(eth2 = case_when(
    Ethnicity %in% c("African American/Black","Latinx: Central American","Latinx: Chicanx / Mexican","Latinx: South American","An Ethnicity Which is not Listed") ~ "Black, Latinx, Unlisted",
    Ethnicity %in% c("Asian: Asian Indian","Asian: Chinese","Asian: Filipinx","Asian: Korean","Asian: Vietnamese") ~ "Asian",
    Ethnicity %in% c("White: European","White: Midde Eastern","White: North African") ~ "White"
  ))

df2 <- subset(df, !is.na(eth2))

table(df2$Ethnicity, df2$eth2, useNA = "always")
##                                   
##                                    Asian Black, Latinx, Unlisted White <NA>
##   African American/Black               0                       4     0    0
##   An Ethnicity Which is not Listed     0                      21     0    0
##   Asian: Asian Indian                  8                       0     0    0
##   Asian: Chinese                      17                       0     0    0
##   Asian: Filipinx                     56                       0     0    0
##   Asian: Korean                       14                       0     0    0
##   Asian: Vietnamese                   75                       0     0    0
##   Latinx: Central American             0                       9     0    0
##   Latinx: Chicanx / Mexican            0                      96     0    0
##   Latinx: South American               0                      10     0    0
##   White: European                      0                       0    75    0
##   White: Midde Eastern                 0                       0     4    0
##   White: North African                 0                       0     1    0
##   <NA>                                 0                       0     0    0
# Create a summary table with counts and percentages
summary_table <- df2 %>%
  group_by(trajectory, eth2) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  group_by(trajectory) %>%
  mutate(percentage = round(count / sum(count) * 100)) %>%
  ungroup()

# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
  pivot_wider(names_from = eth2, values_from = c(count, percentage), 
              names_glue = "{eth2}_{.value}")

# Calculate the overall totals and percentages for each trajectory
totals <- df2 %>%
  group_by(trajectory) %>%
  summarise(Total = n())

# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
  left_join(totals, by = "trajectory")

# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0

# Format percentage columns
summary_table_wide <- summary_table_wide %>%
  mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))

# Format the table in APA style using kableExtra
summary_table_wide %>%
  kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory", 
                                                                "Asian","Black & Latinx","White",
                                                                "Asian","Black & Latinx","White",
                                                                "Total")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  add_header_above(c(" " = 1, "Count" = 3, "Percent" = 3, " " = 1))
Count
Percent
Trajectory Asian Black & Latinx White Asian Black & Latinx White Total
Class 1, n = 16 33 14 18 51% 22% 28% 65
Class 2, n = 28 63 55 3 52% 45% 2% 121
Class 3, n = 11 15 20 11 33% 43% 24% 46
Class 4, n = 17 26 18 27 37% 25% 38% 71
Class 5, n = 13 12 20 21 23% 38% 40% 53
Class 6, n = 2 4 2 0 67% 33% 0% 6
Class 7, n = 3 13 0 0 100% 0% 0% 13
Class 8, n = 3 4 11 0 27% 73% 0% 15
df3 <- unique(subset(df2, select=c(altid,trajectory,eth2)))

xtab <- table(df3$trajectory, df3$eth2)
fisher.test(xtab, simulate.p.value=TRUE)
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on
##  2000 replicates)
## 
## data:  xtab
## p-value = 0.03398
## alternative hypothesis: two.sided
fisher.multcomp(xtab)
## 
##         Pairwise comparisons using Fisher's exact test for count data
## 
## data:  xtab
## 
##                                 Asian:Black, Latinx, Unlisted Asian:White
## Class 1, n = 16:Class 2, n = 28                             1      0.6761
## Class 1, n = 16:Class 3, n = 11                             1      1.0000
## Class 1, n = 16:Class 4, n = 17                             1      1.0000
## Class 1, n = 16:Class 5, n = 13                             1      1.0000
## Class 1, n = 16:Class 6, n = 2                              1      1.0000
## Class 1, n = 16:Class 7, n = 3                              1      1.0000
## Class 1, n = 16:Class 8, n = 3                              1      1.0000
## Class 2, n = 28:Class 3, n = 11                             1      0.5613
## Class 2, n = 28:Class 4, n = 17                             1      0.2620
## Class 2, n = 28:Class 5, n = 13                             1      0.1870
## Class 2, n = 28:Class 6, n = 2                              1      1.0000
## Class 2, n = 28:Class 7, n = 3                              1      1.0000
## Class 2, n = 28:Class 8, n = 3                              1      1.0000
## Class 3, n = 11:Class 4, n = 17                             1      1.0000
## Class 3, n = 11:Class 5, n = 13                             1      1.0000
## Class 3, n = 11:Class 6, n = 2                              1      1.0000
## Class 3, n = 11:Class 7, n = 3                              1      1.0000
## Class 3, n = 11:Class 8, n = 3                              1      1.0000
## Class 4, n = 17:Class 5, n = 13                             1      1.0000
## Class 4, n = 17:Class 6, n = 2                              1      1.0000
## Class 4, n = 17:Class 7, n = 3                              1      1.0000
## Class 4, n = 17:Class 8, n = 3                              1      1.0000
## Class 5, n = 13:Class 6, n = 2                              1      1.0000
## Class 5, n = 13:Class 7, n = 3                              1      1.0000
## Class 5, n = 13:Class 8, n = 3                              1      1.0000
## Class 6, n = 2:Class 7, n = 3                               1      1.0000
## Class 6, n = 2:Class 8, n = 3                               1      1.0000
## Class 7, n = 3:Class 8, n = 3                               1      1.0000
##                                 Black, Latinx, Unlisted:White
## Class 1, n = 16:Class 2, n = 28                        0.3575
## Class 1, n = 16:Class 3, n = 11                        1.0000
## Class 1, n = 16:Class 4, n = 17                        1.0000
## Class 1, n = 16:Class 5, n = 13                        1.0000
## Class 1, n = 16:Class 6, n = 2                         1.0000
## Class 1, n = 16:Class 7, n = 3                         1.0000
## Class 1, n = 16:Class 8, n = 3                         1.0000
## Class 2, n = 28:Class 3, n = 11                        1.0000
## Class 2, n = 28:Class 4, n = 17                        0.2620
## Class 2, n = 28:Class 5, n = 13                        0.3575
## Class 2, n = 28:Class 6, n = 2                         1.0000
## Class 2, n = 28:Class 7, n = 3                         1.0000
## Class 2, n = 28:Class 8, n = 3                         1.0000
## Class 3, n = 11:Class 4, n = 17                        1.0000
## Class 3, n = 11:Class 5, n = 13                        1.0000
## Class 3, n = 11:Class 6, n = 2                         1.0000
## Class 3, n = 11:Class 7, n = 3                         1.0000
## Class 3, n = 11:Class 8, n = 3                         1.0000
## Class 4, n = 17:Class 5, n = 13                        1.0000
## Class 4, n = 17:Class 6, n = 2                         1.0000
## Class 4, n = 17:Class 7, n = 3                         1.0000
## Class 4, n = 17:Class 8, n = 3                         1.0000
## Class 5, n = 13:Class 6, n = 2                         1.0000
## Class 5, n = 13:Class 7, n = 3                         1.0000
## Class 5, n = 13:Class 8, n = 3                         1.0000
## Class 6, n = 2:Class 7, n = 3                          1.0000
## Class 6, n = 2:Class 8, n = 3                          1.0000
## Class 7, n = 3:Class 8, n = 3                          1.0000
## 
## P value adjustment method: fdr

Gender

Using Fisher’s Exact Test, since some cell sizes are zero

Fisher’s is not significant

table(df$Gender)
## 
##   B   F   M 
##   4 298  94
df2 <- subset(df, Gender != "B")

# Create a summary table with counts and percentages
summary_table <- df2 %>%
  group_by(trajectory, Gender) %>%
  summarise(count = n()) %>%
  ungroup() %>%
  group_by(trajectory) %>%
  mutate(percentage = round(count / sum(count) * 100)) %>%
  ungroup()

# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
  pivot_wider(names_from = Gender, values_from = c(count, percentage), 
              names_glue = "{Gender}_{.value}")

# Calculate the overall totals and percentages for each trajectory
totals <- df %>%
  group_by(trajectory) %>%
  summarise(Total = n())

# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
  left_join(totals, by = "trajectory")

# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0

# Format percentage columns
summary_table_wide <- summary_table_wide %>%
  mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))

# Format the table in APA style using kableExtra
summary_table_wide %>%
  kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory", 
                                                                "Men","Women",
                                                                "Men","Women",
                                                                "Total")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  add_header_above(c(" " = 1, "Count" = 2, "Percent" = 2, " " = 1))
Count
Percent
Trajectory Men Women Men Women Total
Class 1, n = 16 63 5 93% 7% 68
Class 2, n = 28 99 23 81% 19% 122
Class 3, n = 11 23 19 55% 45% 46
Class 4, n = 17 45 26 63% 37% 71
Class 5, n = 13 36 18 67% 33% 54
Class 6, n = 2 4 3 57% 43% 7
Class 7, n = 3 13 0 100% 0% 13
Class 8, n = 3 15 0 100% 0% 15
df3 <- unique(subset(df, select=c(altid,trajectory,Gender)))

xtab <- table(df3$trajectory, df3$Gender)
fisher.test(xtab, workspace = 1e+09)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  xtab
## p-value = 0.1238
## alternative hypothesis: two.sided
# fisher.multcomp(xtab)