library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
library(entropy)
library(nnet)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(tidyr)
library(forcats)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
Import dataset and i checked the percentage of female and male characters and the division between species of sexes
DF <- read_excel("D://LOLANALISYS/ANALISI.xlsx")
## New names:
## • `` -> `...13`
DF %>%
group_by(SEX) %>%
summarise(count = n()) %>%
mutate(percentage = (count / sum(count)) * 100)
## # A tibble: 2 × 3
## SEX count percentage
## <dbl> <int> <dbl>
## 1 0 103 61.7
## 2 1 64 38.3
DF %>%
group_by(SPECIES, SEX) %>%
summarise(count = n()) %>%
mutate(percentage = (count / sum(count)) * 100)
## `summarise()` has grouped output by 'SPECIES'. You can override using the
## `.groups` argument.
## # A tibble: 8 × 4
## # Groups: SPECIES [4]
## SPECIES SEX count percentage
## <chr> <dbl> <int> <dbl>
## 1 Beast 0 13 59.1
## 2 Beast 1 9 40.9
## 3 Human 0 52 52
## 4 Human 1 48 48
## 5 Monster 0 28 90.3
## 6 Monster 1 3 9.68
## 7 Yordle 0 10 71.4
## 8 Yordle 1 4 28.6
More males champions than females
I Excluded the character which were half of fully masked, together with the species monster
DF<-DF %>%
filter(!(MASKED %in% c(1, 2)))
DF<-DF%>%
filter(!(SPECIES == "Monster"))
Re-did percentage of female and male characters and the division between species of sexes
DF %>%
group_by(SEX) %>%
summarise(count = n()) %>%
mutate(percentage = (count / sum(count)) * 100)
## # A tibble: 2 × 3
## SEX count percentage
## <dbl> <int> <dbl>
## 1 0 57 50
## 2 1 57 50
DF%>%
group_by(SPECIES, SEX) %>%
summarise(count = n()) %>%
mutate(percentage = (count / sum(count)) * 100)
## `summarise()` has grouped output by 'SPECIES'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups: SPECIES [3]
## SPECIES SEX count percentage
## <chr> <dbl> <int> <dbl>
## 1 Beast 0 12 57.1
## 2 Beast 1 9 42.9
## 3 Human 0 40 47.6
## 4 Human 1 44 52.4
## 5 Yordle 0 5 55.6
## 6 Yordle 1 4 44.4
Same amount of female and males
In order to check the emotions I filtered the champions where the coding was too ambiguous and not final for the reliability and once again general analysis
DATA<- DF%>% filter(!is.na(EMOTIONS))
DATA %>%
group_by(SEX) %>%
summarise(count = n()) %>%
mutate(percentage = (count / sum(count)) * 100)
## # A tibble: 2 × 3
## SEX count percentage
## <dbl> <int> <dbl>
## 1 0 50 49.0
## 2 1 52 51.0
DATA %>%
group_by(SPECIES, SEX) %>%
summarise(count = n()) %>%
mutate(percentage = (count / sum(count)) * 100)
## `summarise()` has grouped output by 'SPECIES'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups: SPECIES [3]
## SPECIES SEX count percentage
## <chr> <dbl> <int> <dbl>
## 1 Beast 0 10 55.6
## 2 Beast 1 8 44.4
## 3 Human 0 35 46.7
## 4 Human 1 40 53.3
## 5 Yordle 0 5 55.6
## 6 Yordle 1 4 44.4
Few females more
General look at emotion / sex
emotion_values <- 0:9
DATA %>%
filter(EMOTIONS %in% emotion_values) %>%
group_by(EMOTIONS, SEX) %>%
summarise(count = n(), .groups = 'drop') %>%
pivot_wider(names_from = c(SEX), values_from = count, values_fill = 0)
## # A tibble: 9 × 3
## EMOTIONS `0` `1`
## <dbl> <int> <int>
## 1 0 11 20
## 2 1 18 5
## 3 2 7 7
## 4 3 0 1
## 5 4 0 1
## 6 5 1 0
## 7 7 8 4
## 8 8 0 2
## 9 9 5 12
General look at emotions per species and sex
emotion_values <- 0:9
DATA %>%
filter(EMOTIONS %in% emotion_values) %>%
group_by(EMOTIONS, SEX, SPECIES) %>%
summarise(count = n(), .groups = 'drop') %>%
pivot_wider(names_from = c(SEX, SPECIES), values_from = count, values_fill = 0)
## # A tibble: 9 × 7
## EMOTIONS `0_Beast` `0_Human` `0_Yordle` `1_Beast` `1_Human` `1_Yordle`
## <dbl> <int> <int> <int> <int> <int> <int>
## 1 0 1 9 1 3 16 1
## 2 1 6 11 1 0 5 0
## 3 2 1 4 2 0 5 2
## 4 3 0 0 0 1 0 0
## 5 4 0 0 0 0 0 1
## 6 5 0 1 0 0 0 0
## 7 7 1 6 1 2 2 0
## 8 8 0 0 0 0 2 0
## 9 9 1 4 0 2 10 0
Percentages
DATA %>%
filter(EMOTIONS %in% emotion_values) %>%
group_by(EMOTIONS, SEX) %>%
summarise(count = n(), .groups = 'drop') %>%
group_by(EMOTIONS) %>%
mutate(total_count = sum(count),
percentage = (count / total_count) * 100) %>%
ungroup() %>%
select(EMOTIONS, SEX, percentage) %>%
pivot_wider(names_from = c(SEX),
values_from = percentage,
values_fill = list(percentage = 0))
## # A tibble: 9 × 3
## EMOTIONS `0` `1`
## <dbl> <dbl> <dbl>
## 1 0 35.5 64.5
## 2 1 78.3 21.7
## 3 2 50 50
## 4 3 0 100
## 5 4 0 100
## 6 5 100 0
## 7 7 66.7 33.3
## 8 8 0 100
## 9 9 29.4 70.6
DATA %>%
filter(EMOTIONS %in% emotion_values) %>%
group_by(EMOTIONS, SEX, SPECIES) %>%
summarise(count = n(), .groups = 'drop') %>%
group_by(EMOTIONS) %>%
mutate(total_count = sum(count),
percentage = (count / total_count) * 100) %>%
ungroup() %>%
select(EMOTIONS, SEX, SPECIES, percentage) %>%
pivot_wider(names_from = c(SEX, SPECIES),
values_from = percentage,
values_fill = list(percentage = 0))
## # A tibble: 9 × 7
## EMOTIONS `0_Beast` `0_Human` `0_Yordle` `1_Beast` `1_Human` `1_Yordle`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 3.23 29.0 3.23 9.68 51.6 3.23
## 2 1 26.1 47.8 4.35 0 21.7 0
## 3 2 7.14 28.6 14.3 0 35.7 14.3
## 4 3 0 0 0 100 0 0
## 5 4 0 0 0 0 0 100
## 6 5 0 100 0 0 0 0
## 7 7 8.33 50 8.33 16.7 16.7 0
## 8 8 0 0 0 0 100 0
## 9 9 5.88 23.5 0 11.8 58.8 0
Visualization of all previous analysis
emotion_labels <- c("Neutral", "Anger", "Happiness", "Surprise", "Disgust",
"Sadness", "Fear", "Content", "Alluring", "Determined")
emotion_counts <- DATA %>%
mutate(EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels)) %>%
group_by(EMOTIONS, Sex = factor(SEX)) %>%
summarise(count = n())
## `summarise()` has grouped output by 'EMOTIONS'. You can override using the
## `.groups` argument.
emotion_counts$Sex <- ifelse(emotion_counts$Sex == 0, "Male", "Female")
# Plotting
ggplot(emotion_counts, aes(x = EMOTIONS, y = count, fill = Sex)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Emotions", y = "Count") +
scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
labels = c("Female", "Male")) +
theme_classic()
species_labels <- c("Human", "Yordle", "Beast")
plots <- lapply(species_labels, function(species) {
species_counts <- DATA %>%
filter(SPECIES == species) %>%
mutate(EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels)) %>%
group_by(EMOTIONS, Sex = factor(SEX)) %>%
summarise(count = n(), .groups = "drop")
species_counts$Sex <- ifelse(species_counts$Sex == 0, "Male", "Female")
#
ggplot(species_counts, aes(x = EMOTIONS, y = count, fill = Sex)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Emotions", y = "Count", title = paste("Distribution of Emotions by Sex -", species)) +
scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
labels = c("Female", "Male")) +
theme_classic()
})
plots
## [[1]]
##
## [[2]]
##
## [[3]]
Checked the CHI square
contingency_table <- table(DATA$SEX, DATA$EMOTIONS)
chi_square_test <- chisq.test(contingency_table)
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
print(chi_square_test)
##
## Pearson's Chi-squared test
##
## data: contingency_table
## X-squared = 19.145, df = 8, p-value = 0.01411
Probably low frequencies for contingency table there is an effect of sex on emotions <0.05
Correction of CHI square
fisher.test(contingency_table)
##
## Fisher's Exact Test for Count Data
##
## data: contingency_table
## p-value = 0.00479
## alternative hypothesis: two.sided
Still significant
Calculation of Entropy in order to check the diversification of emotions between sexes, who between males and feemales showed more emotions?
calculate_entropy <- function(counts) {
probs <- counts / sum(counts)
entropy(probs)
}
MDATA <- table(DATA$EMOTIONS[DATA$SEX == 0])
FDATA <- table(DATA$EMOTIONS[DATA$SEX == 1])
ENTROPY_M <- calculate_entropy(MDATA)
ENTROPY_F <- calculate_entropy(FDATA)
print(ENTROPY_M)
## [1] 1.57787
print(ENTROPY_F)
## [1] 1.675598
Females had more diversification in emotion expressivness
0 is the neutral facial expression meaning lack of emotion
DATAF<- DATA[DATA$EMOTIONS != 0, ]
Visualization of diversification of emotion excluding neutral
f_emotion_labels <- c("Anger", "Happiness", "Surprise", "Disgust",
"Sadness", "Fear", "Content", "Alluring", "Determined")
f_emotion_counts <- DATAF %>%
mutate(EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels),
Sex = factor(SEX, levels = 0:1, labels = c("Male", "Female"))) %>%
group_by(EMOTIONS, Sex) %>%
summarise(count = n(), .groups = "drop")
ggplot(f_emotion_counts, aes(x = EMOTIONS, y = count, fill = Sex)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Emotions", y = "Count", title = "Distribution of Emotions by Sex") +
scale_fill_manual(values = c("cyan4", "tan1"), name = "Sex",
labels = c("Male", "Female")) +
theme_classic()
CHI square with the exclusion of the neutral facial expression
TABLE_DATAF<-table(DATAF$SEX, DATAF$EMOTIONS)
chisq.test(TABLE_DATAF)
## Warning in chisq.test(TABLE_DATAF): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: TABLE_DATAF
## X-squared = 16.029, df = 7, p-value = 0.02485
Correction
fisher.test(TABLE_DATAF)
##
## Fisher's Exact Test for Count Data
##
## data: TABLE_DATAF
## p-value = 0.008591
## alternative hypothesis: two.sided
There is an effect of Sex on Emotions
Entropy with exclusion of the Neutral facial expression
M_DATAF <- table(DATAF$EMOTIONS[DATAF$SEX == 0])
F_DATAF <- table(DATAF$EMOTIONS[DATAF$SEX == 1])
# Calculate entropy for each gender
ENTROPY_M_F <- calculate_entropy(M_DATAF)
ENTROPY_F_F <- calculate_entropy(F_DATAF)
print(ENTROPY_M_F)
## [1] 1.347388
print(ENTROPY_F_F)
## [1] 1.640145
Females have more diversification in expressiveness
Multinominal MODELS
NO 0
model<- multinom(EMOTIONS ~ SEX, data = DATAF)
## # weights: 24 (14 variable)
## initial value 147.640349
## iter 10 value 105.940033
## iter 20 value 105.047337
## iter 30 value 105.032791
## iter 30 value 105.032791
## final value 105.032791
## converged
summary(model)
## Call:
## multinom(formula = EMOTIONS ~ SEX, data = DATAF)
##
## Coefficients:
## (Intercept) SEX
## 2 -0.9444545 1.2808807
## 3 -49.7996998 48.1902382
## 4 -49.7996998 48.1902382
## 5 -2.8903238 -8.9900227
## 7 -0.8109446 0.5877524
## 8 -42.5926746 41.6762994
## 9 -1.2809084 2.1563309
##
## Std. Errors:
## (Intercept) SEX
## 2 0.4454349 0.7357055
## 3 0.5477172 0.5477172
## 4 0.5477172 0.5477172
## 5 1.0273804 169.9418704
## 7 0.4249210 0.7940733
## 8 0.4183344 0.4183344
## 9 0.5055207 0.7340826
##
## Residual Deviance: 210.0656
## AIC: 238.0656
WITH 0
model1<- multinom(EMOTIONS ~ SEX, data = DATA)
## # weights: 27 (16 variable)
## initial value 224.116907
## iter 10 value 167.645945
## iter 20 value 166.033807
## iter 30 value 166.024643
## final value 166.024635
## converged
summary(model1)
## Call:
## multinom(formula = EMOTIONS ~ SEX, data = DATA)
##
## Coefficients:
## (Intercept) SEX
## 1 0.4924964 -1.8787892
## 2 -0.4519629 -0.5978621
## 3 -16.9508633 13.9551360
## 4 -16.9508633 13.9551360
## 5 -2.3978173 -16.6568526
## 7 -0.3184110 -1.2910066
## 8 -17.8515513 15.5489481
## 9 -0.7884348 0.2776129
##
## Std. Errors:
## (Intercept) SEX
## 1 0.3827095 6.296560e-01
## 2 0.4834956 6.531660e-01
## 3 0.5123468 5.123466e-01
## 4 0.5123468 5.123466e-01
## 5 1.0444395 1.351889e-07
## 7 0.4646593 7.182644e-01
## 8 0.3708133 3.708133e-01
## 9 0.5393614 6.513401e-01
##
## Residual Deviance: 332.0493
## AIC: 364.0493
WITH 0
model2<- lm(EMOTIONS ~ SEX, data = DATA)
summary(model2)
##
## Call:
## lm(formula = EMOTIONS ~ SEX, data = DATA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.423 -2.760 -1.760 4.240 6.240
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.7600 0.4963 5.561 2.24e-07 ***
## SEX 0.6631 0.6952 0.954 0.342
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.51 on 100 degrees of freedom
## Multiple R-squared: 0.009016, Adjusted R-squared: -0.0008936
## F-statistic: 0.9098 on 1 and 100 DF, p-value: 0.3425
NO 0
model3<- lm(EMOTIONS ~ SEX, data = DATAF)
summary(model3)
##
## Call:
## lm(formula = EMOTIONS ~ SEX, data = DATAF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.562 -2.538 -1.538 3.438 5.462
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.5385 0.5257 6.731 4.06e-09 ***
## SEX 2.0240 0.7830 2.585 0.0119 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.283 on 69 degrees of freedom
## Multiple R-squared: 0.08829, Adjusted R-squared: 0.07508
## F-statistic: 6.682 on 1 and 69 DF, p-value: 0.01185
When the neutral expression is excluded from the analysis there is a significant effect of SEX on Emotions
model4 <- lm(EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATA)
summary(model4)
##
## Call:
## lm(formula = EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.140 -2.447 -1.160 2.860 7.956
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.42659 1.64760 0.866 0.3888
## SEX 5.22546 2.92633 1.786 0.0774 .
## ETHICAL 0.65518 0.95082 0.689 0.4925
## MORAL 0.04851 2.09171 0.023 0.9815
## SEX:ETHICAL -2.81255 1.65520 -1.699 0.0926 .
## SEX:MORAL -5.65617 3.28759 -1.720 0.0886 .
## ETHICAL:MORAL 0.35393 1.18717 0.298 0.7663
## SEX:ETHICAL:MORAL 3.40046 1.80996 1.879 0.0634 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.352 on 94 degrees of freedom
## Multiple R-squared: 0.1504, Adjusted R-squared: 0.08717
## F-statistic: 2.378 on 7 and 94 DF, p-value: 0.02776
model5 <- lm(EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATAF)
summary(model5)
##
## Call:
## lm(formula = EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATAF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.810 -2.515 -1.230 2.768 6.171
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4283 1.9069 1.273 0.2075
## SEX 7.9366 3.3128 2.396 0.0196 *
## ETHICAL 0.4011 1.0836 0.370 0.7125
## MORAL 0.5661 2.3999 0.236 0.8143
## SEX:ETHICAL -3.3227 1.8621 -1.784 0.0792 .
## SEX:MORAL -6.9802 3.7356 -1.869 0.0663 .
## ETHICAL:MORAL 0.1199 1.3508 0.089 0.9295
## SEX:ETHICAL:MORAL 3.6591 2.0377 1.796 0.0773 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.238 on 63 degrees of freedom
## Multiple R-squared: 0.1899, Adjusted R-squared: 0.09994
## F-statistic: 2.11 on 7 and 63 DF, p-value: 0.05518
There are Tendencies on the effect of the 2 allignments on the emotion, and also when taking into account SEX, meaning that the personality and sex has a tendency to be correlated to the emotional expression and it differ between sexes
emmeans(model5,~ SEX * ETHICAL * MORAL)
## SEX ETHICAL MORAL emmean SE df lower.CL upper.CL
## 0 1.56 0.817 3.67 0.535 63 2.60 4.74
## 1 1.56 0.817 5.38 0.596 63 4.19 6.57
##
## Confidence level used: 0.95
Visualization of the effect of personality divided by sex
ethical_labels <- c("Neutral", "Lawful", "Chaotic")
moral_labels <- c("Neutral", "Good", "Evil")
emotion_labels <- c("Neutral", "Anger", "Happiness", "Surprise", "Disgust",
"Sadness", "Fear", "Content", "Alluring", "Determined")
data_preprocessed <- DATA %>%
mutate(
ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
Sex = ifelse(SEX == 0, "Male", "Female")
)
ethical_summary <- data_preprocessed %>%
group_by(ETHICAL, Sex) %>%
summarise(count = n(), .groups = "drop")
moral_summary <- data_preprocessed %>%
group_by(MORAL, Sex) %>%
summarise(count = n(), .groups = "drop")
ethical_plot <- ggplot(ethical_summary, aes(x = ETHICAL, y = count, fill = Sex)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Ethical Alignment", y = "Count") +
scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
labels = c("Female", "Male")) +
theme_classic()
moral_plot <- ggplot(moral_summary, aes(x = MORAL, y = count, fill = Sex)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Moral Alignment", y = "Count") +
scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
labels = c("Female", "Male")) +
theme_classic()
grid.arrange(ethical_plot, moral_plot, ncol = 2)
Different type of visualization ( less color blind friendly)
data_preprocessed <- DATA %>%
mutate(
ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
Sex = ifelse(SEX == 0, "Male", "Female")
)
moral_summary <- data_preprocessed %>%
group_by(Sex, MORAL) %>%
summarise(count = n(), .groups = "drop")
ethical_summary <- data_preprocessed %>%
group_by(Sex, ETHICAL) %>%
summarise(count = n(), .groups = "drop")
moral_plot <- ggplot(moral_summary, aes(x = Sex, y = count, fill = MORAL)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Sex", y = "Count", title = "Distribution of Moral Alignments by Sex") +
scale_fill_manual(values = c( "lightgreen","gray", "orange"), name = "Moral Alignment",
labels = c( "Good","Neutral", "Evil")) +
theme_classic()
ethical_plot <- ggplot(ethical_summary, aes(x = Sex, y = count, fill = ETHICAL)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Sex", y = "Count", title = "Distribution of Ethical Alignments by Sex") +
scale_fill_manual(values = c("lightgreen","gray", "orange"), name = "Ethical Alignment",
labels = c( "Lawful","Neutral", "Chaotic")) +
theme_classic()
grid.arrange(moral_plot, ethical_plot, ncol = 2)
Tables showing diversification of emotions and personality
data_preprocessed <- DATA %>%
mutate(
ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
Sex = ifelse(SEX == 0, "Male", "Female")
)
data_preprocessed %>%
group_by(Sex, MORAL) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(Sex) %>%
mutate(percentage = count / sum(count) * 100)
## # A tibble: 6 × 4
## # Groups: Sex [2]
## Sex MORAL count percentage
## <chr> <fct> <int> <dbl>
## 1 Female Neutral 16 30.8
## 2 Female Good 25 48.1
## 3 Female Evil 11 21.2
## 4 Male Neutral 24 48
## 5 Male Good 20 40
## 6 Male Evil 6 12
data_preprocessed %>%
group_by(Sex, ETHICAL) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(Sex) %>%
mutate(percentage = count / sum(count) * 100)
## # A tibble: 6 × 4
## # Groups: Sex [2]
## Sex ETHICAL count percentage
## <chr> <fct> <int> <dbl>
## 1 Female Neutral 6 11.5
## 2 Female Lawful 18 34.6
## 3 Female Chaotic 28 53.8
## 4 Male Neutral 5 10
## 5 Male Lawful 16 32
## 6 Male Chaotic 29 58
data_preprocessed <- DATA %>%
mutate(
ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels),
Sex = ifelse(SEX == 0, "Male", "Female")
)
data_preprocessed %>%
group_by(Sex, EMOTIONS) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(Sex) %>%
mutate(percentage = count / sum(count) * 100) %>%
ungroup() %>%
arrange(Sex, EMOTIONS)
## # A tibble: 14 × 4
## Sex EMOTIONS count percentage
## <chr> <fct> <int> <dbl>
## 1 Female Neutral 20 38.5
## 2 Female Anger 5 9.62
## 3 Female Happiness 7 13.5
## 4 Female Surprise 1 1.92
## 5 Female Disgust 1 1.92
## 6 Female Content 4 7.69
## 7 Female Alluring 2 3.85
## 8 Female Determined 12 23.1
## 9 Male Neutral 11 22
## 10 Male Anger 18 36
## 11 Male Happiness 7 14
## 12 Male Sadness 1 2
## 13 Male Content 8 16
## 14 Male Determined 5 10