Crosstab of content type by gender
Performance posts made up 69% of posts by males, compared to only 37%
of posts by females. Thus, males were more likely than females to post
performance content. Among females, nearly half (47.9%) of posts were
endorsement posts. By contrast, endorsement posts made up only 16.7
percent of posts by males. So, females were more likely than males to
post endorsement content. A chi-square test found the difference
statistically significant.
# ------------------------------
# Setup: Install and load packages
# ------------------------------
if (!require("tidyverse")) install.packages("tidyverse") # Data wrangling & plotting
if (!require("gmodels")) install.packages("gmodels") # Crosstabs
if (!require("gt")) install.packages("gt") # Table formatting
library(tidyverse)
library(gmodels)
library(gt)
# ------------------------------
# Load the data
# ------------------------------
# Replace "YOURFILENAME.csv" with your dataset name
mydata <- read.csv("NewData.csv") #Edit
################################################
# Content type by gender
################################################
# ------------------------------
# Define Dependent (DV) and Independent (IV) variables
# ------------------------------
# Replace YOURDVNAME and YOURIVNAME with actual column names in your data
mydata$DV <- mydata$Content.Type #Edit
mydata$IV <- mydata$Gender #Edit
mydata <- mydata %>%
mutate(DV = case_when(
Content.Type == 1 ~ "Performance",
Content.Type == 2 ~ "Personal",
Content.Type == 3 ~ "Endorsement",
TRUE ~ "Error"
))
# ------------------------------
# Visualization: Stacked bar chart of IV by DV
# ------------------------------
graph <- ggplot(mydata, aes(x = IV, fill = DV)) +
geom_bar(colour = "black") +
scale_fill_brewer(palette = "Paired") +
labs(
title = "Content Type by Gender",
x = "Independent Variable",
y = "Count",
fill = "Dependent Variable"
)
#Show the graph
graph

# ------------------------------
# Crosstabulation of DV by IV (DV = rows, IV = columns)
# ------------------------------
crosstab <- mydata %>%
count(DV, IV) %>%
group_by(IV) %>%
mutate(RowPct = 100 * n / sum(n)) %>%
ungroup() %>%
mutate(Cell = paste0(n, "\n(", round(RowPct, 1), "%)")) %>%
select(DV, IV, Cell) %>%
pivot_wider(names_from = IV, values_from = Cell)
# Format into gt table
crosstab_table <- crosstab %>%
gt(rowname_col = "DV") %>%
tab_header(
title = "Content Type by Gender",
subtitle = "Counts and (Column Percentages)"
) %>%
cols_label(
DV = "Dependent Variable"
)
# Show the polished crosstab table
crosstab_table
| Content Type by Gender |
| Counts and (Column Percentages) |
|
F |
M |
| Endorsement |
35
(47.9%) |
21
(16.7%) |
| Performance |
27
(37%) |
87
(69%) |
| Personal |
11
(15.1%) |
18
(14.3%) |
# ------------------------------
# Chi-squared test of independence
# ------------------------------
options(scipen = 999) # Prevents scientific notation
chitestresults <- chisq.test(mydata$DV, mydata$IV)
# ------------------------------
# Format Chi-squared test results into a table
# ------------------------------
chitest_summary <- tibble(
Test = "Chi-squared Test of Independence",
Chi_sq = chitestresults$statistic,
df = chitestresults$parameter,
p = chitestresults$p.value
)
chitest_table <- chitest_summary %>%
gt() %>%
# Round χ² and p-value to 3 decimals, df to integer
fmt_number(columns = c(Chi_sq, p), decimals = 3) %>%
fmt_number(columns = df, decimals = 0) %>%
tab_header(
title = "Chi-squared Test Results",
subtitle = "Test of Independence between DV and IV"
) %>%
cols_label(
Test = "Test",
Chi_sq = "Chi-squared Statistic",
df = "Degrees of Freedom",
p = "p-value"
)
# Show the formatted results table
chitest_table
| Chi-squared Test Results |
| Test of Independence between DV and IV |
| Test |
Chi-squared Statistic |
Degrees of Freedom |
p-value |
| Chi-squared Test of Independence |
24.383 |
2 |
0.000 |
Crosstab of collaboration by gender
Collaboration occurred in 79.4% of posts by males, compared to only
63% of posts by females. The difference was, again, significant,
according to a chi-square test. Thus, posts by males were significantly
more likely to feature collaboration than were posts by females.
################################################
# Collaboration by gender
################################################
# ------------------------------
# Define Dependent (DV) and Independent (IV) variables
# ------------------------------
# Replace YOURDVNAME and YOURIVNAME with actual column names in your data
mydata$DV <- mydata$Collaboration #Edit
mydata$IV <- mydata$Gender #Edit
mydata <- mydata %>%
mutate(DV = case_when(
Collaboration == 0 ~ "No",
Collaboration == 1 ~ "Yes",
TRUE ~ "Error"
))
# ------------------------------
# Visualization: Stacked bar chart of IV by DV
# ------------------------------
graph <- ggplot(mydata, aes(x = IV, fill = DV)) +
geom_bar(colour = "black") +
scale_fill_brewer(palette = "Paired") +
labs(
title = "Collaboration by Gender",
x = "Independent Variable",
y = "Count",
fill = "Dependent Variable"
)
#Show the graph
graph

# ------------------------------
# Crosstabulation of DV by IV (DV = rows, IV = columns)
# ------------------------------
crosstab <- mydata %>%
count(DV, IV) %>%
group_by(IV) %>%
mutate(RowPct = 100 * n / sum(n)) %>%
ungroup() %>%
mutate(Cell = paste0(n, "\n(", round(RowPct, 1), "%)")) %>%
select(DV, IV, Cell) %>%
pivot_wider(names_from = IV, values_from = Cell)
# Format into gt table
crosstab_table <- crosstab %>%
gt(rowname_col = "DV") %>%
tab_header(
title = "Collaboration by Gender",
subtitle = "Counts and (Column Percentages)"
) %>%
cols_label(
DV = "Dependent Variable"
)
# Show the polished crosstab table
crosstab_table
| Collaboration by Gender |
| Counts and (Column Percentages) |
|
F |
M |
| No |
27
(37%) |
26
(20.6%) |
| Yes |
46
(63%) |
100
(79.4%) |
# ------------------------------
# Chi-squared test of independence
# ------------------------------
options(scipen = 999) # Prevents scientific notation
chitestresults <- chisq.test(mydata$DV, mydata$IV)
# ------------------------------
# Format Chi-squared test results into a table
# ------------------------------
chitest_summary <- tibble(
Test = "Chi-squared Test of Independence",
Chi_sq = chitestresults$statistic,
df = chitestresults$parameter,
p = chitestresults$p.value
)
chitest_table <- chitest_summary %>%
gt() %>%
# Round χ² and p-value to 3 decimals, df to integer
fmt_number(columns = c(Chi_sq, p), decimals = 3) %>%
fmt_number(columns = df, decimals = 0) %>%
tab_header(
title = "Chi-squared Test Results",
subtitle = "Test of Independence between DV and IV"
) %>%
cols_label(
Test = "Test",
Chi_sq = "Chi-squared Statistic",
df = "Degrees of Freedom",
p = "p-value"
)
# Show the formatted results table
chitest_table
| Chi-squared Test Results |
| Test of Independence between DV and IV |
| Test |
Chi-squared Statistic |
Degrees of Freedom |
p-value |
| Chi-squared Test of Independence |
5.515 |
1 |
0.019 |