library(dplyr)
library(RVAideMemoire)
library(ggplot2)
library(tidyr)
library(kableExtra)
library(formattable)
df <- read.csv(file="new_data/export.csv", header=T)
traj <- read.csv(file="df_8.csv", header=T)
df$trajectory_bu <- df$trajectory
df$trajectory[df$trajectory_bu == "1"] <- "Class 3, n = 11"
df$trajectory[df$trajectory_bu == "2"] <- "Class 6, n = 2"
df$trajectory[df$trajectory_bu == "3"] <- "Class 5, n = 13"
df$trajectory[df$trajectory_bu == "4"] <- "Class 2, n = 28"
df$trajectory[df$trajectory_bu == "5"] <- "Class 4, n = 17"
df$trajectory[df$trajectory_bu == "6"] <- "Class 1, n = 16"
df$trajectory[df$trajectory_bu == "7"] <- "Class 8, n = 3"
df$trajectory[df$trajectory_bu == "8"] <- "Class 7, n = 3"
df$trajectory <- factor(df$trajectory, levels = c("Class 1, n = 16",
"Class 2, n = 28",
"Class 3, n = 11",
"Class 4, n = 17",
"Class 5, n = 13",
"Class 6, n = 2",
"Class 7, n = 3",
"Class 8, n = 3"))
Using Fisher’s Exact Test, since some cell sizes are zero
Fisher’s is significant (p = .050) but posthocs are not
grades <- subset(df, select=c(altid, BIOL_Grade))
ids <- unique(grades$altid)
out <- data.frame()
n <- 1
for (i in 1:93) {
id <- ids[n]
temp <- subset(grades, altid == id)
if (all(temp$BIOL_Grade %in% c("A", "A+", "A-"))) {
out <- rbind(out, data.frame(altid = id, status = "high"))
} else if (any(temp$BIOL_Grade %in% c("C", "C+", "D", "F", "NP"))) {
out <- rbind(out, data.frame(altid = id, status = "low"))
} else if (any(temp$BIOL_Grade %in% c("B", "B-", "B+"))) {
out <- rbind(out, data.frame(altid = id, status = "mid"))
}
n <- n + 1
}
df2 <- unique(subset(df, select=c(altid, trajectory)))
df3 <- merge(out, df2, by = "altid")
table(df3$status)
##
## high low mid
## 50 15 28
table(df3$trajectory, df3$status)
##
## high low mid
## Class 1, n = 16 4 5 7
## Class 2, n = 28 13 5 10
## Class 3, n = 11 4 3 4
## Class 4, n = 17 12 1 4
## Class 5, n = 13 12 0 1
## Class 6, n = 2 1 0 1
## Class 7, n = 3 2 0 1
## Class 8, n = 3 2 1 0
# Perform Fisher's Exact Test
# Increase workspace size
xtab <- table(df3$trajectory, df3$status)
fisher.test(xtab, workspace = 1e+09)
##
## Fisher's Exact Test for Count Data
##
## data: xtab
## p-value = 0.04961
## alternative hypothesis: two.sided
fisher.multcomp(xtab)
##
## Pairwise comparisons using Fisher's exact test for count data
##
## data: xtab
##
## high:low high:mid low:mid
## Class 1, n = 16:Class 2, n = 28 1.0000 1.0000 1
## Class 1, n = 16:Class 3, n = 11 1.0000 1.0000 1
## Class 1, n = 16:Class 4, n = 17 0.6068 0.6897 1
## Class 1, n = 16:Class 5, n = 13 0.3279 0.3279 1
## Class 1, n = 16:Class 6, n = 2 1.0000 1.0000 1
## Class 1, n = 16:Class 7, n = 3 1.0000 1.0000 1
## Class 1, n = 16:Class 8, n = 3 1.0000 1.0000 1
## Class 2, n = 28:Class 3, n = 11 1.0000 1.0000 1
## Class 2, n = 28:Class 4, n = 17 1.0000 1.0000 1
## Class 2, n = 28:Class 5, n = 13 0.6897 0.6068 1
## Class 2, n = 28:Class 6, n = 2 1.0000 1.0000 1
## Class 2, n = 28:Class 7, n = 3 1.0000 1.0000 1
## Class 2, n = 28:Class 8, n = 3 1.0000 1.0000 1
## Class 3, n = 11:Class 4, n = 17 0.9439 1.0000 1
## Class 3, n = 11:Class 5, n = 13 0.6068 0.6646 1
## Class 3, n = 11:Class 6, n = 2 1.0000 1.0000 1
## Class 3, n = 11:Class 7, n = 3 1.0000 1.0000 1
## Class 3, n = 11:Class 8, n = 3 1.0000 1.0000 1
## Class 4, n = 17:Class 5, n = 13 1.0000 1.0000 1
## Class 4, n = 17:Class 6, n = 2 1.0000 1.0000 1
## Class 4, n = 17:Class 7, n = 3 1.0000 1.0000 1
## Class 4, n = 17:Class 8, n = 3 1.0000 1.0000 1
## Class 5, n = 13:Class 6, n = 2 1.0000 1.0000 1
## Class 5, n = 13:Class 7, n = 3 1.0000 1.0000 1
## Class 5, n = 13:Class 8, n = 3 1.0000 1.0000 1
## Class 6, n = 2:Class 7, n = 3 1.0000 1.0000 1
## Class 6, n = 2:Class 8, n = 3 1.0000 1.0000 1
## Class 7, n = 3:Class 8, n = 3 1.0000 1.0000 1
##
## P value adjustment method: fdr
convert_grade <- function(grade) {
case_when(
grade %in% c("A+", "A") ~ 4,
grade == "A-" ~ 3.7,
grade == "B+" ~ 3.3,
grade == "B" ~ 3,
grade == "B-" ~ 2.7,
grade == "C+" ~ 2.3,
grade == "C" ~ 2,
grade == "C-" ~ 1.7,
grade == "D+" ~ 1.3,
grade == "D" ~ 1,
grade == "D-" ~ 0.7,
grade == "F" ~ 0,
grade == "NP" ~ 0,
TRUE ~ NA_real_ # Handle any other cases (shouldn't happen with this setup)
)
}
# Apply the function to the BIOL_Grade column
df <- df %>%
mutate(grade1 = convert_grade(BIOL_Grade))
df$altid <- as.factor(df$altid)
df$trajectory <- as.factor(df$trajectory)
df$BIOL_Crse <- factor(df$BIOL_Crse, levels = c("F040A", "F040B", "F040C"))
tempdf <- unique(subset(df, select=c(altid, trajectory, BIOL_Crse, grade1)))
tempdf_filtered <- tempdf %>%
group_by(altid) %>%
filter(n() <= 3) # Keep participants who have three or fewer rows
tempdf_filtered <- subset(tempdf_filtered, altid != 982)
ggplot(tempdf_filtered, aes(x = BIOL_Crse, y = grade1, group = altid)) +
geom_line(size = .5, alpha = .25) + # Adjust line size if necessary
labs(title = "Student Grades by Class and Trajectory",
x = "Class", y = "Grade") +
scale_color_discrete(name = "Trajectory") + # Customize legend title
facet_wrap(~trajectory) +
theme_minimal() # Optional: Use a minimal theme
# overlay trajectories over grade plots?
summary_table <- traj %>%
group_by(classn, courseid) %>%
summarize(avg_std_id = mean(std_id, na.rm = TRUE)) %>%
ungroup()
tempdf_filtered$grade2 <- scale(tempdf_filtered$grade1, center=T, scale=T)
summary_table$BIOL_Crse <- NA
summary_table$BIOL_Crse[summary_table$courseid == "1 40A Pre"] <- "F040A"
summary_table$BIOL_Crse[summary_table$courseid == "3 mid"] <- "F040B"
summary_table$BIOL_Crse[summary_table$courseid == "6 40C Post"] <- "F040C"
summary_table <- subset(summary_table, select=-c(courseid))
summary_table$classn <- as.factor(summary_table$classn)
merged_df <- tempdf_filtered %>%
left_join(summary_table, by = c("trajectory" = "classn", "BIOL_Crse" = "BIOL_Crse"))
ggplot(merged_df) +
geom_line(size = .5, alpha = .25, aes(x = BIOL_Crse, y = grade2, group = altid)) +
geom_line(aes(x = BIOL_Crse, y = avg_std_id, group = trajectory), size = 1, color = "red", alpha = 1) +
labs(title = "Standardized Student Grades by Class and Trajectory with ID Overlaid",
x = "Class", y = "Grade") + ylim(-3,2) +
scale_color_discrete(name = "Trajectory") + # Customize legend title
facet_wrap(~trajectory) +
theme_minimal() # Optional: Use a minimal theme
Using Fisher’s Exact Test, since some cell sizes are zero
Fisher’s is not significant
df <- df %>%
mutate(cg = case_when(
Career.Goal %in% c("Dental Assisting/Dental Hygiene", "Dietician/Nutrition",
"EMS/EMT/Paramedic", "Kinesiology/Sports Medicine/Athletic Training",
"Occupational Therapy", "Physical Therapy", "Physician Assistant",
"Radiology", "Respiratory Therapy", "Speech Therapy") ~ "Allied Health",
Career.Goal == "Nursing" ~ "Nursing",
Career.Goal %in% c("Public Health", "Dentist", "Medical School",
"Pharmacy Technologist", "Pharmacist") ~ "Other Health",
Career.Goal %in% c("Veterinary Assistant", "Veterinary Technology") ~ "Veterinary Studies",
Career.Goal %in% c("Counseling", "Biologist", "Biomedical Researchers",
"Engineering", "Chemistry", "Psychology") ~ "STEM",
TRUE ~ "Other" # If there are any other values not specified in the rules
))
# table(df$trajectory, df$cg)
# table(df$altid, df$cg)
carid <- as.data.frame.matrix(table(df$altid, df$cg))
# carid %>%
# mutate(multiple_categories = ifelse(rowSums(select(., `Allied Health`, Nursing, Other, `Other Health`) > 0) > 1, 1, 0))
carid <- carid %>%
mutate(max_category = names(carid)[max.col(carid, "first")]) %>%
mutate(multiple_categories = ifelse(rowSums(select(., `Allied Health`, Nursing, Other, `Other Health`) > 0) > 1, 1, 0))
carid$max_category[carid$multiple_categories == 1] <- "Multiple"
carid$altid <- rownames(carid)
carid <- subset(carid, select=c(altid,max_category))
colnames(carid) <- c("altid","cg2")
df <- df %>%
left_join(carid, by = "altid")
# Create a summary table with counts and percentages
summary_table <- df %>%
group_by(trajectory, cg2) %>%
summarise(count = n()) %>%
ungroup() %>%
group_by(trajectory) %>%
mutate(percentage = round(count / sum(count) * 100)) %>%
ungroup()
# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
pivot_wider(names_from = cg2, values_from = c(count, percentage),
names_glue = "{cg2}_{.value}")
# Calculate the overall totals and percentages for each trajectory
totals <- df %>%
group_by(trajectory) %>%
summarise(Total = n())
# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
left_join(totals, by = "trajectory")
# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0
# Format percentage columns
summary_table_wide <- summary_table_wide %>%
mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))
# Format the table in APA style using kableExtra
summary_table_wide %>%
kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory",
"Allied Health","Multiple","Nursing","Other Health",
"Allied Health","Multiple","Nursing","Other Health",
"Total")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
add_header_above(c(" " = 1, "Count" = 4, "Percent" = 4, " " = 1))
| Trajectory | Allied Health | Multiple | Nursing | Other Health | Allied Health | Multiple | Nursing | Other Health | Total |
|---|---|---|---|---|---|---|---|---|---|
| Class 1, n = 16 | 22 | 25 | 17 | 4 | 32% | 37% | 25% | 6% | 68 |
| Class 2, n = 28 | 35 | 28 | 47 | 12 | 29% | 23% | 39% | 10% | 122 |
| Class 3, n = 11 | 25 | 10 | 7 | 4 | 54% | 22% | 15% | 9% | 46 |
| Class 4, n = 17 | 21 | 13 | 37 | 0 | 30% | 18% | 52% | 0% | 71 |
| Class 5, n = 13 | 13 | 22 | 19 | 0 | 24% | 41% | 35% | 0% | 54 |
| Class 6, n = 2 | 4 | 3 | 0 | 0 | 57% | 43% | 0% | 0% | 7 |
| Class 7, n = 3 | 0 | 9 | 4 | 0 | 0% | 69% | 31% | 0% | 13 |
| Class 8, n = 3 | 9 | 0 | 6 | 0 | 60% | 0% | 40% | 0% | 15 |
df2 <- unique(subset(df, select=c(altid,trajectory,cg2)))
xtab <- table(df2$trajectory, df2$cg2)
fisher.test(xtab, workspace = 1e+09)
##
## Fisher's Exact Test for Count Data
##
## data: xtab
## p-value = 0.7537
## alternative hypothesis: two.sided
# fisher.multcomp(xtab)
Using Fisher’s Exact Test, since some cell sizes are zero
Fisher’s is not significant
table(df$CurrentlyEmployed)
##
## No Yes
## 261 107
# Create a summary table with counts and percentages
summary_table <- df %>%
group_by(trajectory, CurrentlyEmployed) %>%
summarise(count = n()) %>%
ungroup() %>%
group_by(trajectory) %>%
mutate(percentage = round(count / sum(count) * 100)) %>%
ungroup()
# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
pivot_wider(names_from = CurrentlyEmployed, values_from = c(count, percentage),
names_glue = "{CurrentlyEmployed}_{.value}")
# Calculate the overall totals and percentages for each trajectory
totals <- df %>%
group_by(trajectory) %>%
summarise(Total = n())
# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
left_join(totals, by = "trajectory")
# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0
# Format percentage columns
summary_table_wide <- summary_table_wide %>%
mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))
# Format the table in APA style using kableExtra
summary_table_wide %>%
kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory",
"Not Currently Employed","Currently Employed","NA",
"Not Currently Employed","Currently Employed","NA",
"Total")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
add_header_above(c(" " = 1, "Count" = 3, "Percent" = 3, " " = 1))
| Trajectory | Not Currently Employed | Currently Employed | NA | Not Currently Employed | Currently Employed | NA | Total |
|---|---|---|---|---|---|---|---|
| Class 1, n = 16 | 56 | 6 | 6 | 82% | 9% | 9% | 68 |
| Class 2, n = 28 | 74 | 39 | 9 | 61% | 32% | 7% | 122 |
| Class 3, n = 11 | 29 | 14 | 3 | 63% | 30% | 7% | 46 |
| Class 4, n = 17 | 56 | 12 | 3 | 79% | 17% | 4% | 71 |
| Class 5, n = 13 | 24 | 25 | 5 | 44% | 46% | 9% | 54 |
| Class 6, n = 2 | 4 | 3 | 0 | 57% | 43% | 0% | 7 |
| Class 7, n = 3 | 6 | 5 | 2 | 46% | 38% | 15% | 13 |
| Class 8, n = 3 | 12 | 3 | 0 | 80% | 20% | 0% | 15 |
df2 <- unique(subset(df, select=c(altid,trajectory,CurrentlyEmployed)))
xtab <- table(df2$trajectory, df2$CurrentlyEmployed)
fisher.test(xtab, workspace = 1e+09)
##
## Fisher's Exact Test for Count Data
##
## data: xtab
## p-value = 0.3324
## alternative hypothesis: two.sided
# fisher.multcomp(xtab)
Using Fisher’s Exact Test, since some cell sizes are zero
Fisher’s is significant (p < .043) but posthocs are not
table(df$Ethnicity, useNA = "always")
##
## African American/Black An Ethnicity Which is not Listed
## 4 21
## Asian: Asian Indian Asian: Chinese
## 8 17
## Asian: Filipinx Asian: Korean
## 56 14
## Asian: Vietnamese Latinx: Central American
## 75 9
## Latinx: Chicanx / Mexican Latinx: South American
## 96 10
## White: European White: Midde Eastern
## 75 4
## White: North African <NA>
## 1 6
df <- df %>%
mutate(eth2 = case_when(
Ethnicity %in% c("African American/Black","Latinx: Central American","Latinx: Chicanx / Mexican","Latinx: South American","An Ethnicity Which is not Listed") ~ "Black, Latinx, Unlisted",
Ethnicity %in% c("Asian: Asian Indian","Asian: Chinese","Asian: Filipinx","Asian: Korean","Asian: Vietnamese") ~ "Asian",
Ethnicity %in% c("White: European","White: Midde Eastern","White: North African") ~ "White"
))
df2 <- subset(df, !is.na(eth2))
table(df2$Ethnicity, df2$eth2, useNA = "always")
##
## Asian Black, Latinx, Unlisted White <NA>
## African American/Black 0 4 0 0
## An Ethnicity Which is not Listed 0 21 0 0
## Asian: Asian Indian 8 0 0 0
## Asian: Chinese 17 0 0 0
## Asian: Filipinx 56 0 0 0
## Asian: Korean 14 0 0 0
## Asian: Vietnamese 75 0 0 0
## Latinx: Central American 0 9 0 0
## Latinx: Chicanx / Mexican 0 96 0 0
## Latinx: South American 0 10 0 0
## White: European 0 0 75 0
## White: Midde Eastern 0 0 4 0
## White: North African 0 0 1 0
## <NA> 0 0 0 0
# Create a summary table with counts and percentages
summary_table <- df2 %>%
group_by(trajectory, eth2) %>%
summarise(count = n()) %>%
ungroup() %>%
group_by(trajectory) %>%
mutate(percentage = round(count / sum(count) * 100)) %>%
ungroup()
# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
pivot_wider(names_from = eth2, values_from = c(count, percentage),
names_glue = "{eth2}_{.value}")
# Calculate the overall totals and percentages for each trajectory
totals <- df2 %>%
group_by(trajectory) %>%
summarise(Total = n())
# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
left_join(totals, by = "trajectory")
# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0
# Format percentage columns
summary_table_wide <- summary_table_wide %>%
mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))
# Format the table in APA style using kableExtra
summary_table_wide %>%
kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory",
"Asian","Black & Latinx","White",
"Asian","Black & Latinx","White",
"Total")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
add_header_above(c(" " = 1, "Count" = 3, "Percent" = 3, " " = 1))
| Trajectory | Asian | Black & Latinx | White | Asian | Black & Latinx | White | Total |
|---|---|---|---|---|---|---|---|
| Class 1, n = 16 | 33 | 14 | 18 | 51% | 22% | 28% | 65 |
| Class 2, n = 28 | 63 | 55 | 3 | 52% | 45% | 2% | 121 |
| Class 3, n = 11 | 15 | 20 | 11 | 33% | 43% | 24% | 46 |
| Class 4, n = 17 | 26 | 18 | 27 | 37% | 25% | 38% | 71 |
| Class 5, n = 13 | 12 | 20 | 21 | 23% | 38% | 40% | 53 |
| Class 6, n = 2 | 4 | 2 | 0 | 67% | 33% | 0% | 6 |
| Class 7, n = 3 | 13 | 0 | 0 | 100% | 0% | 0% | 13 |
| Class 8, n = 3 | 4 | 11 | 0 | 27% | 73% | 0% | 15 |
df3 <- unique(subset(df2, select=c(altid,trajectory,eth2)))
xtab <- table(df3$trajectory, df3$eth2)
fisher.test(xtab, simulate.p.value=TRUE)
##
## Fisher's Exact Test for Count Data with simulated p-value (based on
## 2000 replicates)
##
## data: xtab
## p-value = 0.03398
## alternative hypothesis: two.sided
fisher.multcomp(xtab)
##
## Pairwise comparisons using Fisher's exact test for count data
##
## data: xtab
##
## Asian:Black, Latinx, Unlisted Asian:White
## Class 1, n = 16:Class 2, n = 28 1 0.6761
## Class 1, n = 16:Class 3, n = 11 1 1.0000
## Class 1, n = 16:Class 4, n = 17 1 1.0000
## Class 1, n = 16:Class 5, n = 13 1 1.0000
## Class 1, n = 16:Class 6, n = 2 1 1.0000
## Class 1, n = 16:Class 7, n = 3 1 1.0000
## Class 1, n = 16:Class 8, n = 3 1 1.0000
## Class 2, n = 28:Class 3, n = 11 1 0.5613
## Class 2, n = 28:Class 4, n = 17 1 0.2620
## Class 2, n = 28:Class 5, n = 13 1 0.1870
## Class 2, n = 28:Class 6, n = 2 1 1.0000
## Class 2, n = 28:Class 7, n = 3 1 1.0000
## Class 2, n = 28:Class 8, n = 3 1 1.0000
## Class 3, n = 11:Class 4, n = 17 1 1.0000
## Class 3, n = 11:Class 5, n = 13 1 1.0000
## Class 3, n = 11:Class 6, n = 2 1 1.0000
## Class 3, n = 11:Class 7, n = 3 1 1.0000
## Class 3, n = 11:Class 8, n = 3 1 1.0000
## Class 4, n = 17:Class 5, n = 13 1 1.0000
## Class 4, n = 17:Class 6, n = 2 1 1.0000
## Class 4, n = 17:Class 7, n = 3 1 1.0000
## Class 4, n = 17:Class 8, n = 3 1 1.0000
## Class 5, n = 13:Class 6, n = 2 1 1.0000
## Class 5, n = 13:Class 7, n = 3 1 1.0000
## Class 5, n = 13:Class 8, n = 3 1 1.0000
## Class 6, n = 2:Class 7, n = 3 1 1.0000
## Class 6, n = 2:Class 8, n = 3 1 1.0000
## Class 7, n = 3:Class 8, n = 3 1 1.0000
## Black, Latinx, Unlisted:White
## Class 1, n = 16:Class 2, n = 28 0.3575
## Class 1, n = 16:Class 3, n = 11 1.0000
## Class 1, n = 16:Class 4, n = 17 1.0000
## Class 1, n = 16:Class 5, n = 13 1.0000
## Class 1, n = 16:Class 6, n = 2 1.0000
## Class 1, n = 16:Class 7, n = 3 1.0000
## Class 1, n = 16:Class 8, n = 3 1.0000
## Class 2, n = 28:Class 3, n = 11 1.0000
## Class 2, n = 28:Class 4, n = 17 0.2620
## Class 2, n = 28:Class 5, n = 13 0.3575
## Class 2, n = 28:Class 6, n = 2 1.0000
## Class 2, n = 28:Class 7, n = 3 1.0000
## Class 2, n = 28:Class 8, n = 3 1.0000
## Class 3, n = 11:Class 4, n = 17 1.0000
## Class 3, n = 11:Class 5, n = 13 1.0000
## Class 3, n = 11:Class 6, n = 2 1.0000
## Class 3, n = 11:Class 7, n = 3 1.0000
## Class 3, n = 11:Class 8, n = 3 1.0000
## Class 4, n = 17:Class 5, n = 13 1.0000
## Class 4, n = 17:Class 6, n = 2 1.0000
## Class 4, n = 17:Class 7, n = 3 1.0000
## Class 4, n = 17:Class 8, n = 3 1.0000
## Class 5, n = 13:Class 6, n = 2 1.0000
## Class 5, n = 13:Class 7, n = 3 1.0000
## Class 5, n = 13:Class 8, n = 3 1.0000
## Class 6, n = 2:Class 7, n = 3 1.0000
## Class 6, n = 2:Class 8, n = 3 1.0000
## Class 7, n = 3:Class 8, n = 3 1.0000
##
## P value adjustment method: fdr
Using Fisher’s Exact Test, since some cell sizes are zero
Fisher’s is not significant
table(df$Gender)
##
## B F M
## 4 298 94
df2 <- subset(df, Gender != "B")
# Create a summary table with counts and percentages
summary_table <- df2 %>%
group_by(trajectory, Gender) %>%
summarise(count = n()) %>%
ungroup() %>%
group_by(trajectory) %>%
mutate(percentage = round(count / sum(count) * 100)) %>%
ungroup()
# Pivot the table to a wide format
summary_table_wide <- summary_table %>%
pivot_wider(names_from = Gender, values_from = c(count, percentage),
names_glue = "{Gender}_{.value}")
# Calculate the overall totals and percentages for each trajectory
totals <- df %>%
group_by(trajectory) %>%
summarise(Total = n())
# Merge the totals back into the summary table
summary_table_wide <- summary_table_wide %>%
left_join(totals, by = "trajectory")
# Fill NA with 0 for counts and percentages
summary_table_wide[is.na(summary_table_wide)] <- 0
# Format percentage columns
summary_table_wide <- summary_table_wide %>%
mutate(across(ends_with("percentage"), ~ paste0(formatC(., format = "f", digits = 0), "%")))
# Format the table in APA style using kableExtra
summary_table_wide %>%
kable(format = "html", digits = 0, align = "c", col.names = c("Trajectory",
"Men","Women",
"Men","Women",
"Total")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
add_header_above(c(" " = 1, "Count" = 2, "Percent" = 2, " " = 1))
| Trajectory | Men | Women | Men | Women | Total |
|---|---|---|---|---|---|
| Class 1, n = 16 | 63 | 5 | 93% | 7% | 68 |
| Class 2, n = 28 | 99 | 23 | 81% | 19% | 122 |
| Class 3, n = 11 | 23 | 19 | 55% | 45% | 46 |
| Class 4, n = 17 | 45 | 26 | 63% | 37% | 71 |
| Class 5, n = 13 | 36 | 18 | 67% | 33% | 54 |
| Class 6, n = 2 | 4 | 3 | 57% | 43% | 7 |
| Class 7, n = 3 | 13 | 0 | 100% | 0% | 13 |
| Class 8, n = 3 | 15 | 0 | 100% | 0% | 15 |
df3 <- unique(subset(df, select=c(altid,trajectory,Gender)))
xtab <- table(df3$trajectory, df3$Gender)
fisher.test(xtab, workspace = 1e+09)
##
## Fisher's Exact Test for Count Data
##
## data: xtab
## p-value = 0.1238
## alternative hypothesis: two.sided
# fisher.multcomp(xtab)