## global variables:
# only if necessary to limit running time
analyze word associations
Notes
load needed pre-processed data
# sets the directory of location of this script as the current directory
# setwd(dirname(rstudioapi::getSourceEditorContext()$path))
# load packages
require(pacman)
p_load('xlsx', 'associatoR', 'DT', 'igraph')
# load data
setwd("../generated data")
<- xlsx::read.xlsx2(file = "word_associations_translated.xlsx", sheetIndex = 1) # "word_associations.xlsx" associationData
table(associationData$cue)
muscular normal weight overweight underweight
50 50 50 50
dim(associationData)
[1] 200 6
::datatable(data = associationData) DT
get wordclouds
<- associationData[associationData$cue == "muscular", ]
tmp
<- table(tmp$response)
word_frequencies <- as.data.frame(word_frequencies)
word_frequencies_df
library(wordcloud)
Lade nötiges Paket: RColorBrewer
# Create the word cloud
wordcloud(words = word_frequencies_df$Var1,
freq = word_frequencies_df$Freq,
min.freq = 1,
max.words = 100,
random.order = FALSE,
colors = brewer.pal(8, "Dark2"))
<- associationData[associationData$cue == "normal weight", ]
tmp
<- table(tmp$response)
word_frequencies <- as.data.frame(word_frequencies)
word_frequencies_df
library(wordcloud)
# Create the word cloud
wordcloud(words = word_frequencies_df$Var1,
freq = word_frequencies_df$Freq,
min.freq = 1,
max.words = 100,
random.order = FALSE,
colors = brewer.pal(8, "Dark2"))
<- associationData[associationData$cue == "overweight", ]
tmp
<- table(tmp$response)
word_frequencies <- as.data.frame(word_frequencies)
word_frequencies_df
library(wordcloud)
# Create the word cloud
wordcloud(words = word_frequencies_df$Var1,
freq = word_frequencies_df$Freq,
min.freq = 1,
max.words = 100,
random.order = FALSE,
colors = brewer.pal(8, "Dark2"))
<- associationData[associationData$cue == "underweight", ]
tmp
<- table(tmp$response)
word_frequencies <- as.data.frame(word_frequencies)
word_frequencies_df
library(wordcloud)
# Create the word cloud
wordcloud(words = word_frequencies_df$Var1,
freq = word_frequencies_df$Freq,
min.freq = 1,
max.words = 100,
random.order = FALSE,
colors = brewer.pal(8, "Dark2"))
get semantic networks
<- associationData[associationData$cue == "overweight", ]
tmp $response[tmp$participant_id == unique(tmp$participant_id)[1]] tmp
[1] "Health risks" "Obesity"
[3] "Diet" "Lack of exercise"
[5] "Loss of self-confidence"
c("participant_id", "response")] tmp[
participant_id response
1 Run_1_1 Health risks
2 Run_1_1 Obesity
3 Run_1_1 Diet
4 Run_1_1 Lack of exercise
5 Run_1_1 Loss of self-confidence
21 Run_2_1 Health risks
22 Run_2_1 Diet
23 Run_2_1 Overweight
24 Run_2_1 Obesity
25 Run_2_1 Health problems
41 Run_3_1 Health problems
42 Run_3_1 Fat deposits
43 Run_3_1 Difficulty walking
44 Run_3_1 Unattractive appearance
45 Run_3_1 Eating habits
61 Run_4_1 Health risk
62 Run_4_1 Obesity
63 Run_4_1 Diet
64 Run_4_1 Overweight
65 Run_4_1 Nutrition
81 Run_5_1 Health risks
82 Run_5_1 Overweight
83 Run_5_1 Obesity
84 Run_5_1 Difficulties
85 Run_5_1 Loss of self-confidence
101 Run_6_1 Health problems
102 Run_6_1 Diet
103 Run_6_1 Overweight
104 Run_6_1 Obesity
105 Run_6_1 Dietary change
121 Run_7_1 Health problems
122 Run_7_1 Diet
123 Run_7_1 Overweight
124 Run_7_1 Dietary change
125 Run_7_1 Loss of self-confidence
141 Run_8_1 Health risks
142 Run_8_1 Obesity
143 Run_8_1 Diet
144 Run_8_1 Overweight
145 Run_8_1 Eating habits
161 Run_9_1 Health problems
162 Run_9_1 Diet
163 Run_9_1 Obesity
164 Run_9_1 Difficulty in sports
165 Run_9_1 Loss of self-confidence
181 Run_10_1 Health risk
182 Run_10_1 Diet
183 Run_10_1 Obesity
184 Run_10_1 Overweight
185 Run_10_1 Dietary change
# Group responses by participant
library(dplyr)
Warning: Paket 'dplyr' wurde unter R Version 4.3.2 erstellt
Attache Paket: 'dplyr'
Die folgenden Objekte sind maskiert von 'package:igraph':
as_data_frame, groups, union
Die folgenden Objekte sind maskiert von 'package:stats':
filter, lag
Die folgenden Objekte sind maskiert von 'package:base':
intersect, setdiff, setequal, union
<- tmp %>%
grouped_responses group_by(participant_id) %>%
summarise(associations = list(response)) %>%
ungroup()
# Function to create edges from the grouped responses
<- function(association, distance = 2) {
create_edges <- data.frame()
edges <- length(association)
n
for (i in 1:n) {
for (d in 1:distance) {
if (i + d <= n) {
<- rbind(edges, data.frame(from = association[i], to = association[i + d]))
edges
}
}
}return(edges)
}
# Apply the function to create edges for each participant
<- do.call(rbind, lapply(grouped_responses$associations, create_edges))
all_edges
# Calculate word frequencies across the entire dataset
<- tmp %>%
word_frequencies count(response, name = "frequency")
# Calculate the edge weights (how often each link appears between two words)
<- all_edges %>%
edge_weights group_by(from, to) %>%
summarise(weight = n(), .groups = "drop")
# Create the graph from the edge list with edge weights
<- graph_from_data_frame(edge_weights, directed = FALSE)
g
# Get the list of unique words in the graph (vertices)
<- V(g)$name
vertices
# Map word frequencies to vertex sizes
<- sapply(vertices, function(word) {
vertex_sizes <- word_frequencies %>%
freq filter(response == word) %>%
pull(frequency)
if(length(freq) == 0) {
return(1) # Default size if the word is not found (shouldn't happen)
else {
} return(freq)
}
})
# Extract edge weights to adjust the edge widths
<- E(g)$weight
edge_widths
# Plot the network with vertex sizes proportional to word frequencies and edge widths proportional to edge weights
plot(g,
vertex.label.color = "black",
vertex.size = vertex_sizes * 2, # Scale vertex sizes by word frequency
edge.width = edge_widths * 1, # Scale edge widths by weight (number of links)
edge.arrow.size = 0.5)
sort(table(tmp$response))
Difficulties Difficulty in sports Difficulty walking
1 1 1
Fat deposits Lack of exercise Nutrition
1 1 1
Unattractive appearance Eating habits Health risk
1 2 2
Dietary change Health risks Loss of self-confidence
3 4 4
Health problems Overweight Diet
5 7 8
Obesity
8
<- as.matrix(as_adjacency_matrix(g))
mat rowSums(mat) >= 5, colSums(mat) >= 5] mat[
Diet Health problems Obesity Overweight
Diet 0 1 2 1
Health problems 1 0 2 2
Obesity 2 2 0 2
Overweight 1 2 2 0
Loss of self-confidence 1 0 1 1
Loss of self-confidence
Diet 1
Health problems 0
Obesity 1
Overweight 1
Loss of self-confidence 0
OLD
<- ar_import(associationData,
ar_obj participant = participant_id,
cue = cue,
response = response,
response_vars = c(response_position))
str(ar_obj)
# normalize responses
<- ar_normalize(ar_obj,
ar_obj case = "most_frequent",
punct = "all",
whitespace = "squish",
process_cues = TRUE)
<- ar_set_targets(ar_obj, targets = "cues")
ar_obj
<- ar_summarize_targets(ar_obj,
ar_obj response_var = response_position,
fun = mean)
$targets
ar_obj
<- ar_embed_targets(ar_obj,
ar_obj method = "ppmi-svd",
n_dim = 100)
<- ar_project_embedding(ar_obj, method = "umap")
projection
ar_plot_wordcloud(
ar_obj,facet_col = cues,
facet_row = NULL,
color_by = cues,
top_n = 20
)
# Importing the associationData data
<- ar_import(data = associationData,
ar_obj participant = participant_id,
cue = cue,
response = response,
participant_vars = c(gender,
agegroup),response_vars = c(response_position))
# export wordlist
# associatoR::ar_wordlist_export(ar_obj,
# file = "outputs/associationData_wordlist.xlsx")
# import wordlist
# ar_obj <- associatoR::ar_wordlist_import(ar_obj,
# file = "associationData_wordlist.csv",
# process_cues = TRUE,
# na = c("NA"))
# ar_obj <- ar_count_targets(ar_obj) # to get frequencies of target
# ar_obj$targets
<- ar_characterize_targets(
ar_obj
ar_obj,characteristics = c("valence", "arousal", "dominance"),
case_sensitive = FALSE
# "word_frequency", "concreteness"
) $targets
ar_obj
<- ar_summarize_targets(ar_obj,
ar_obj response_var = response_position,
fun = mean)
$targets
ar_obj
<- ar_correlate_targets(
ar_obj
ar_obj,participant_vars = c(gender, agegroup),
metric = "auto"
)
$targets$target[order(ar_obj$targets$gender_corr)[1:5]] # more frequently by women
ar_obj$targets$target[order(ar_obj$targets$gender_corr, decreasing = TRUE)[1:5]] # more frequently by man
ar_obj
#######################################
<- ar_cross_targets(ar_obj,
tmp participant_vars = c(gender), # agegroup
target_var = target,
normalize = FALSE)
$target == "empathy",]
tmp[tmp$target == "Elon Musk",]
tmp[tmp#######################################
<- ar_cluster_targets(ar_obj,
ar_obj method = "louvain",
similarity = "cosine",
resolution = 1.1)
table(ar_obj$targets$cluster)
# ar_plot_embedding(
# ar_obj,
# color_by = cluster,
# color_set = "G",
# alpha = 0.5,
# proportion_labels = 1
# )
ar_import(associationData,
participant = participant_id,
cue = cue,
response = response,
participant_vars = c(gender, agegroup),
response_vars = c(response_position, response_level)) %>%
ar_set_targets("cues") %>%
ar_embed_targets() %>%
ar_cluster_targets() %>%
ar_project_embedding() %>%
ar_plot_embedding(color_by = cluster,
proportion_labels = .5)
ar_plot_wordcloud(
ar_obj,facet_col = cluster,
facet_row = NULL,
color_by = cluster,
top_n = 20
)
# ar_cluster_stability(ar_obj)
ar_cross_targets(ar_obj,
participant_vars = c(gender), # agegroup
target_var = cluster,
normalize = TRUE)
ar_compare_targets(ar_obj,
participant_vars = c(gender, agegroup),
target_var = valence,
fun = median, na.rm = TRUE)
ar_compare_embeddings(ar_obj,
participant_vars = c(gender, agegroup),
type = "triangle",
intersection = "pair")
ar_import(associationData,
participant = participant_id,
cue = cue,
response = response,
response_vars = c(response_position)) %>%
ar_set_targets("cues") %>%
ar_embed_targets(method = "counts") %>%
ar_cluster_targets(method = "hclust", k = 3) %>%
ar_project_embedding(method = "mds") %>%
ar_plot_embedding(color_by = cluster,
proportion_labels = .5)
ar_plot_wordcloud(
ar_obj,facet_col = cluster,
facet_row = NULL,
color_by = cluster,
top_n = 20
)