LIBRARIES

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(car) 
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
library(entropy)
library(nnet)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
library(tidyr)
library(forcats)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine

DATASET AND GENERAL ANALYSES

Import dataset and i checked the percentage of female and male characters and the division between species of sexes

DF <- read_excel("D://LOLANALISYS/ANALISI.xlsx")
## New names:
## • `` -> `...13`
DF %>%
  group_by(SEX) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100)
## # A tibble: 2 × 3
##     SEX count percentage
##   <dbl> <int>      <dbl>
## 1     0   103       61.7
## 2     1    64       38.3
DF %>%
  group_by(SPECIES, SEX) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100)
## `summarise()` has grouped output by 'SPECIES'. You can override using the
## `.groups` argument.
## # A tibble: 8 × 4
## # Groups:   SPECIES [4]
##   SPECIES   SEX count percentage
##   <chr>   <dbl> <int>      <dbl>
## 1 Beast       0    13      59.1 
## 2 Beast       1     9      40.9 
## 3 Human       0    52      52   
## 4 Human       1    48      48   
## 5 Monster     0    28      90.3 
## 6 Monster     1     3       9.68
## 7 Yordle      0    10      71.4 
## 8 Yordle      1     4      28.6

More males champions than females

I Excluded the character which were half of fully masked, together with the species monster

DF<-DF %>%
  filter(!(MASKED %in% c(1, 2)))

DF<-DF%>%
  filter(!(SPECIES == "Monster"))

Re-did percentage of female and male characters and the division between species of sexes

DF %>%
  group_by(SEX) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100)
## # A tibble: 2 × 3
##     SEX count percentage
##   <dbl> <int>      <dbl>
## 1     0    57         50
## 2     1    57         50
DF%>%
  group_by(SPECIES, SEX) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100)
## `summarise()` has grouped output by 'SPECIES'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups:   SPECIES [3]
##   SPECIES   SEX count percentage
##   <chr>   <dbl> <int>      <dbl>
## 1 Beast       0    12       57.1
## 2 Beast       1     9       42.9
## 3 Human       0    40       47.6
## 4 Human       1    44       52.4
## 5 Yordle      0     5       55.6
## 6 Yordle      1     4       44.4

Same amount of female and males

In order to check the emotions I filtered the champions where the coding was too ambiguous and not final for the reliability and once again general analysis

DATA<- DF%>% filter(!is.na(EMOTIONS))

DATA %>%
  group_by(SEX) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100)
## # A tibble: 2 × 3
##     SEX count percentage
##   <dbl> <int>      <dbl>
## 1     0    50       49.0
## 2     1    52       51.0
DATA %>%
  group_by(SPECIES, SEX) %>%
  summarise(count = n()) %>%
  mutate(percentage = (count / sum(count)) * 100)
## `summarise()` has grouped output by 'SPECIES'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 4
## # Groups:   SPECIES [3]
##   SPECIES   SEX count percentage
##   <chr>   <dbl> <int>      <dbl>
## 1 Beast       0    10       55.6
## 2 Beast       1     8       44.4
## 3 Human       0    35       46.7
## 4 Human       1    40       53.3
## 5 Yordle      0     5       55.6
## 6 Yordle      1     4       44.4

Few females more

General look at emotion / sex

emotion_values <- 0:9

DATA %>%
  filter(EMOTIONS %in% emotion_values) %>%
  group_by(EMOTIONS, SEX) %>%
  summarise(count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = c(SEX), values_from = count, values_fill = 0)
## # A tibble: 9 × 3
##   EMOTIONS   `0`   `1`
##      <dbl> <int> <int>
## 1        0    11    20
## 2        1    18     5
## 3        2     7     7
## 4        3     0     1
## 5        4     0     1
## 6        5     1     0
## 7        7     8     4
## 8        8     0     2
## 9        9     5    12

General look at emotions per species and sex

emotion_values <- 0:9

DATA %>%
  filter(EMOTIONS %in% emotion_values) %>%
  group_by(EMOTIONS, SEX, SPECIES) %>%
  summarise(count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = c(SEX, SPECIES), values_from = count, values_fill = 0)
## # A tibble: 9 × 7
##   EMOTIONS `0_Beast` `0_Human` `0_Yordle` `1_Beast` `1_Human` `1_Yordle`
##      <dbl>     <int>     <int>      <int>     <int>     <int>      <int>
## 1        0         1         9          1         3        16          1
## 2        1         6        11          1         0         5          0
## 3        2         1         4          2         0         5          2
## 4        3         0         0          0         1         0          0
## 5        4         0         0          0         0         0          1
## 6        5         0         1          0         0         0          0
## 7        7         1         6          1         2         2          0
## 8        8         0         0          0         0         2          0
## 9        9         1         4          0         2        10          0

Percentages

DATA %>%
  filter(EMOTIONS %in% emotion_values) %>%
  group_by(EMOTIONS, SEX) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(EMOTIONS) %>%
  mutate(total_count = sum(count),
         percentage = (count / total_count) * 100) %>%
  ungroup() %>%
  select(EMOTIONS, SEX, percentage) %>%
  pivot_wider(names_from = c(SEX), 
              values_from = percentage, 
              values_fill = list(percentage = 0))
## # A tibble: 9 × 3
##   EMOTIONS   `0`   `1`
##      <dbl> <dbl> <dbl>
## 1        0  35.5  64.5
## 2        1  78.3  21.7
## 3        2  50    50  
## 4        3   0   100  
## 5        4   0   100  
## 6        5 100     0  
## 7        7  66.7  33.3
## 8        8   0   100  
## 9        9  29.4  70.6
DATA %>%
  filter(EMOTIONS %in% emotion_values) %>%
  group_by(EMOTIONS, SEX, SPECIES) %>%
  summarise(count = n(), .groups = 'drop') %>%
  group_by(EMOTIONS) %>%
  mutate(total_count = sum(count),
         percentage = (count / total_count) * 100) %>%
  ungroup() %>%
  select(EMOTIONS, SEX, SPECIES, percentage) %>%
  pivot_wider(names_from = c(SEX, SPECIES), 
              values_from = percentage, 
              values_fill = list(percentage = 0))
## # A tibble: 9 × 7
##   EMOTIONS `0_Beast` `0_Human` `0_Yordle` `1_Beast` `1_Human` `1_Yordle`
##      <dbl>     <dbl>     <dbl>      <dbl>     <dbl>     <dbl>      <dbl>
## 1        0      3.23      29.0       3.23      9.68      51.6       3.23
## 2        1     26.1       47.8       4.35      0         21.7       0   
## 3        2      7.14      28.6      14.3       0         35.7      14.3 
## 4        3      0          0         0       100          0         0   
## 5        4      0          0         0         0          0       100   
## 6        5      0        100         0         0          0         0   
## 7        7      8.33      50         8.33     16.7       16.7       0   
## 8        8      0          0         0         0        100         0   
## 9        9      5.88      23.5       0        11.8       58.8       0

PLOTS

Visualization of all previous analysis

emotion_labels <- c("Neutral", "Anger", "Happiness", "Surprise", "Disgust",
                    "Sadness", "Fear", "Content", "Alluring", "Determined")

emotion_counts <- DATA %>%
  mutate(EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels)) %>%
  group_by(EMOTIONS, Sex = factor(SEX)) %>%
  summarise(count = n())
## `summarise()` has grouped output by 'EMOTIONS'. You can override using the
## `.groups` argument.
emotion_counts$Sex <- ifelse(emotion_counts$Sex == 0, "Male", "Female")

# Plotting
ggplot(emotion_counts, aes(x = EMOTIONS, y = count, fill = Sex)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Emotions", y = "Count") +
  scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
                    labels = c("Female", "Male")) +
  theme_classic()

species_labels <- c("Human", "Yordle", "Beast")

plots <- lapply(species_labels, function(species) {
  species_counts <- DATA %>%
    filter(SPECIES == species) %>%
    mutate(EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels)) %>%
    group_by(EMOTIONS, Sex = factor(SEX)) %>%
    summarise(count = n(), .groups = "drop")
  
  species_counts$Sex <- ifelse(species_counts$Sex == 0, "Male", "Female")
  
  #
  ggplot(species_counts, aes(x = EMOTIONS, y = count, fill = Sex)) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(x = "Emotions", y = "Count", title = paste("Distribution of Emotions by Sex -", species)) +
    scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
                      labels = c("Female", "Male")) +
    theme_classic()
})

plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

Checked the CHI square

contingency_table <- table(DATA$SEX, DATA$EMOTIONS)

chi_square_test <- chisq.test(contingency_table)
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
print(chi_square_test)
## 
##  Pearson's Chi-squared test
## 
## data:  contingency_table
## X-squared = 19.145, df = 8, p-value = 0.01411

Probably low frequencies for contingency table there is an effect of sex on emotions <0.05

Correction of CHI square

fisher.test(contingency_table)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  contingency_table
## p-value = 0.00479
## alternative hypothesis: two.sided

Still significant

Calculation of Entropy in order to check the diversification of emotions between sexes, who between males and feemales showed more emotions?

calculate_entropy <- function(counts) {
  probs <- counts / sum(counts)
  entropy(probs)
}

MDATA <- table(DATA$EMOTIONS[DATA$SEX == 0])
FDATA <- table(DATA$EMOTIONS[DATA$SEX == 1])


ENTROPY_M <- calculate_entropy(MDATA)
ENTROPY_F <- calculate_entropy(FDATA)


print(ENTROPY_M)
## [1] 1.57787
print(ENTROPY_F)
## [1] 1.675598

Females had more diversification in emotion expressivness

FILTER DATA WITHOUT NEUTRAL EMOTION

0 is the neutral facial expression meaning lack of emotion

DATAF<- DATA[DATA$EMOTIONS != 0, ]

Visualization of diversification of emotion excluding neutral

f_emotion_labels <- c("Anger", "Happiness", "Surprise", "Disgust",
                    "Sadness", "Fear", "Content", "Alluring", "Determined")


f_emotion_counts <- DATAF %>%
  mutate(EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels),
         Sex = factor(SEX, levels = 0:1, labels = c("Male", "Female"))) %>%
  group_by(EMOTIONS, Sex) %>%
  summarise(count = n(), .groups = "drop")


ggplot(f_emotion_counts, aes(x = EMOTIONS, y = count, fill = Sex)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Emotions", y = "Count", title = "Distribution of Emotions by Sex") +
  scale_fill_manual(values = c("cyan4", "tan1"), name = "Sex",
                    labels = c("Male", "Female")) +
  theme_classic()

CHI square with the exclusion of the neutral facial expression

TABLE_DATAF<-table(DATAF$SEX, DATAF$EMOTIONS)

chisq.test(TABLE_DATAF)
## Warning in chisq.test(TABLE_DATAF): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  TABLE_DATAF
## X-squared = 16.029, df = 7, p-value = 0.02485

Correction

fisher.test(TABLE_DATAF)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  TABLE_DATAF
## p-value = 0.008591
## alternative hypothesis: two.sided

There is an effect of Sex on Emotions

Entropy with exclusion of the Neutral facial expression

M_DATAF <- table(DATAF$EMOTIONS[DATAF$SEX == 0])
F_DATAF <- table(DATAF$EMOTIONS[DATAF$SEX == 1])

# Calculate entropy for each gender
ENTROPY_M_F <- calculate_entropy(M_DATAF)
ENTROPY_F_F <- calculate_entropy(F_DATAF)


print(ENTROPY_M_F)
## [1] 1.347388
print(ENTROPY_F_F)
## [1] 1.640145

Females have more diversification in expressiveness

Multinominal MODELS

NO 0

model<- multinom(EMOTIONS ~ SEX, data = DATAF)
## # weights:  24 (14 variable)
## initial  value 147.640349 
## iter  10 value 105.940033
## iter  20 value 105.047337
## iter  30 value 105.032791
## iter  30 value 105.032791
## final  value 105.032791 
## converged
summary(model)
## Call:
## multinom(formula = EMOTIONS ~ SEX, data = DATAF)
## 
## Coefficients:
##   (Intercept)        SEX
## 2  -0.9444545  1.2808807
## 3 -49.7996998 48.1902382
## 4 -49.7996998 48.1902382
## 5  -2.8903238 -8.9900227
## 7  -0.8109446  0.5877524
## 8 -42.5926746 41.6762994
## 9  -1.2809084  2.1563309
## 
## Std. Errors:
##   (Intercept)         SEX
## 2   0.4454349   0.7357055
## 3   0.5477172   0.5477172
## 4   0.5477172   0.5477172
## 5   1.0273804 169.9418704
## 7   0.4249210   0.7940733
## 8   0.4183344   0.4183344
## 9   0.5055207   0.7340826
## 
## Residual Deviance: 210.0656 
## AIC: 238.0656

WITH 0

model1<- multinom(EMOTIONS ~ SEX, data = DATA)
## # weights:  27 (16 variable)
## initial  value 224.116907 
## iter  10 value 167.645945
## iter  20 value 166.033807
## iter  30 value 166.024643
## final  value 166.024635 
## converged
summary(model1)
## Call:
## multinom(formula = EMOTIONS ~ SEX, data = DATA)
## 
## Coefficients:
##   (Intercept)         SEX
## 1   0.4924964  -1.8787892
## 2  -0.4519629  -0.5978621
## 3 -16.9508633  13.9551360
## 4 -16.9508633  13.9551360
## 5  -2.3978173 -16.6568526
## 7  -0.3184110  -1.2910066
## 8 -17.8515513  15.5489481
## 9  -0.7884348   0.2776129
## 
## Std. Errors:
##   (Intercept)          SEX
## 1   0.3827095 6.296560e-01
## 2   0.4834956 6.531660e-01
## 3   0.5123468 5.123466e-01
## 4   0.5123468 5.123466e-01
## 5   1.0444395 1.351889e-07
## 7   0.4646593 7.182644e-01
## 8   0.3708133 3.708133e-01
## 9   0.5393614 6.513401e-01
## 
## Residual Deviance: 332.0493 
## AIC: 364.0493

WITH 0

model2<- lm(EMOTIONS ~ SEX, data = DATA)

summary(model2)
## 
## Call:
## lm(formula = EMOTIONS ~ SEX, data = DATA)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.423 -2.760 -1.760  4.240  6.240 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.7600     0.4963   5.561 2.24e-07 ***
## SEX           0.6631     0.6952   0.954    0.342    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.51 on 100 degrees of freedom
## Multiple R-squared:  0.009016,   Adjusted R-squared:  -0.0008936 
## F-statistic: 0.9098 on 1 and 100 DF,  p-value: 0.3425

NO 0

model3<- lm(EMOTIONS ~ SEX, data = DATAF)

summary(model3)
## 
## Call:
## lm(formula = EMOTIONS ~ SEX, data = DATAF)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.562 -2.538 -1.538  3.438  5.462 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.5385     0.5257   6.731 4.06e-09 ***
## SEX           2.0240     0.7830   2.585   0.0119 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.283 on 69 degrees of freedom
## Multiple R-squared:  0.08829,    Adjusted R-squared:  0.07508 
## F-statistic: 6.682 on 1 and 69 DF,  p-value: 0.01185

When the neutral expression is excluded from the analysis there is a significant effect of SEX on Emotions

IMPORTANT MODELS

WITH 0

model4 <- lm(EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATA)

summary(model4)
## 
## Call:
## lm(formula = EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATA)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.140 -2.447 -1.160  2.860  7.956 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        1.42659    1.64760   0.866   0.3888  
## SEX                5.22546    2.92633   1.786   0.0774 .
## ETHICAL            0.65518    0.95082   0.689   0.4925  
## MORAL              0.04851    2.09171   0.023   0.9815  
## SEX:ETHICAL       -2.81255    1.65520  -1.699   0.0926 .
## SEX:MORAL         -5.65617    3.28759  -1.720   0.0886 .
## ETHICAL:MORAL      0.35393    1.18717   0.298   0.7663  
## SEX:ETHICAL:MORAL  3.40046    1.80996   1.879   0.0634 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.352 on 94 degrees of freedom
## Multiple R-squared:  0.1504, Adjusted R-squared:  0.08717 
## F-statistic: 2.378 on 7 and 94 DF,  p-value: 0.02776

NO 0

model5 <- lm(EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATAF)

summary(model5)
## 
## Call:
## lm(formula = EMOTIONS ~ SEX * ETHICAL * MORAL, data = DATAF)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.810 -2.515 -1.230  2.768  6.171 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         2.4283     1.9069   1.273   0.2075  
## SEX                 7.9366     3.3128   2.396   0.0196 *
## ETHICAL             0.4011     1.0836   0.370   0.7125  
## MORAL               0.5661     2.3999   0.236   0.8143  
## SEX:ETHICAL        -3.3227     1.8621  -1.784   0.0792 .
## SEX:MORAL          -6.9802     3.7356  -1.869   0.0663 .
## ETHICAL:MORAL       0.1199     1.3508   0.089   0.9295  
## SEX:ETHICAL:MORAL   3.6591     2.0377   1.796   0.0773 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.238 on 63 degrees of freedom
## Multiple R-squared:  0.1899, Adjusted R-squared:  0.09994 
## F-statistic:  2.11 on 7 and 63 DF,  p-value: 0.05518

There are Tendencies on the effect of the 2 allignments on the emotion, and also when taking into account SEX, meaning that the personality and sex has a tendency to be correlated to the emotional expression and it differ between sexes

POST HOC NO 0

emmeans(model5,~ SEX * ETHICAL * MORAL)
##  SEX ETHICAL MORAL emmean    SE df lower.CL upper.CL
##    0    1.56 0.817   3.67 0.535 63     2.60     4.74
##    1    1.56 0.817   5.38 0.596 63     4.19     6.57
## 
## Confidence level used: 0.95

Visualization of the effect of personality divided by sex

ethical_labels <- c("Neutral", "Lawful", "Chaotic")
moral_labels <- c("Neutral", "Good", "Evil")
emotion_labels <- c("Neutral", "Anger", "Happiness", "Surprise", "Disgust",
                    "Sadness", "Fear", "Content", "Alluring", "Determined")

data_preprocessed <- DATA %>%
  mutate(
    ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
    MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
    Sex = ifelse(SEX == 0, "Male", "Female")
  )


ethical_summary <- data_preprocessed %>%
  group_by(ETHICAL, Sex) %>%
  summarise(count = n(), .groups = "drop")


moral_summary <- data_preprocessed %>%
  group_by(MORAL, Sex) %>%
  summarise(count = n(), .groups = "drop")


ethical_plot <- ggplot(ethical_summary, aes(x = ETHICAL, y = count, fill = Sex)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Ethical Alignment", y = "Count") +
  scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
                    labels = c("Female", "Male")) +
  theme_classic()


moral_plot <- ggplot(moral_summary, aes(x = MORAL, y = count, fill = Sex)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Moral Alignment", y = "Count") +
  scale_fill_manual(values = c("tan1", "cyan4"), name = "Sex",
                    labels = c("Female", "Male")) +
  theme_classic()

grid.arrange(ethical_plot, moral_plot, ncol = 2)

Different type of visualization ( less color blind friendly)

data_preprocessed <- DATA %>%
  mutate(
    ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
    MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
    Sex = ifelse(SEX == 0, "Male", "Female")
  )

moral_summary <- data_preprocessed %>%
  group_by(Sex, MORAL) %>%
  summarise(count = n(), .groups = "drop")

ethical_summary <- data_preprocessed %>%
  group_by(Sex, ETHICAL) %>%
  summarise(count = n(), .groups = "drop")


moral_plot <- ggplot(moral_summary, aes(x = Sex, y = count, fill = MORAL)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Sex", y = "Count", title = "Distribution of Moral Alignments by Sex") +
  scale_fill_manual(values = c( "lightgreen","gray", "orange"), name = "Moral Alignment",
                    labels = c( "Good","Neutral", "Evil")) +
  theme_classic()


ethical_plot <- ggplot(ethical_summary, aes(x = Sex, y = count, fill = ETHICAL)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(x = "Sex", y = "Count", title = "Distribution of Ethical Alignments by Sex") +
  scale_fill_manual(values = c("lightgreen","gray", "orange"), name = "Ethical Alignment",
                    labels = c( "Lawful","Neutral", "Chaotic")) +
  theme_classic()


grid.arrange(moral_plot, ethical_plot, ncol = 2)

Tables showing diversification of emotions and personality

data_preprocessed <- DATA %>%
  mutate(
    ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
    MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
    Sex = ifelse(SEX == 0, "Male", "Female")
  )

data_preprocessed %>%
  group_by(Sex, MORAL) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(Sex) %>%
  mutate(percentage = count / sum(count) * 100)
## # A tibble: 6 × 4
## # Groups:   Sex [2]
##   Sex    MORAL   count percentage
##   <chr>  <fct>   <int>      <dbl>
## 1 Female Neutral    16       30.8
## 2 Female Good       25       48.1
## 3 Female Evil       11       21.2
## 4 Male   Neutral    24       48  
## 5 Male   Good       20       40  
## 6 Male   Evil        6       12
data_preprocessed %>%
  group_by(Sex, ETHICAL) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(Sex) %>%
  mutate(percentage = count / sum(count) * 100)
## # A tibble: 6 × 4
## # Groups:   Sex [2]
##   Sex    ETHICAL count percentage
##   <chr>  <fct>   <int>      <dbl>
## 1 Female Neutral     6       11.5
## 2 Female Lawful     18       34.6
## 3 Female Chaotic    28       53.8
## 4 Male   Neutral     5       10  
## 5 Male   Lawful     16       32  
## 6 Male   Chaotic    29       58
data_preprocessed <- DATA %>%
  mutate(
    ETHICAL = factor(ETHICAL, levels = 0:2, labels = ethical_labels),
    MORAL = factor(MORAL, levels = 0:2, labels = moral_labels),
    EMOTIONS = factor(EMOTIONS, levels = 0:9, labels = emotion_labels),
    Sex = ifelse(SEX == 0, "Male", "Female")
  )


data_preprocessed %>%
  group_by(Sex, EMOTIONS) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(Sex) %>%
  mutate(percentage = count / sum(count) * 100) %>%
  ungroup() %>%
  arrange(Sex, EMOTIONS)
## # A tibble: 14 × 4
##    Sex    EMOTIONS   count percentage
##    <chr>  <fct>      <int>      <dbl>
##  1 Female Neutral       20      38.5 
##  2 Female Anger          5       9.62
##  3 Female Happiness      7      13.5 
##  4 Female Surprise       1       1.92
##  5 Female Disgust        1       1.92
##  6 Female Content        4       7.69
##  7 Female Alluring       2       3.85
##  8 Female Determined    12      23.1 
##  9 Male   Neutral       11      22   
## 10 Male   Anger         18      36   
## 11 Male   Happiness      7      14   
## 12 Male   Sadness        1       2   
## 13 Male   Content        8      16   
## 14 Male   Determined     5      10