Characterize structure stratified by age, gender, deprivation status
# Require the igraph package, used for plotting and "reachableness"
# Require tidyverse package for data manipulation
# Require kableExtra (just for table layout in this rMarkdown file)
library(igraph)
library(tidyverse)
library(kableExtra)
# custom code for plots
graph_plot<-function(grp){
g <- graph(c(rbind(grp[,"cause"],grp[,"effect"])),directed=TRUE)
plot(g,
edge.color = grp$edge.color,
vertex.size = 8,
vertex.label.cex = 0.8,
vertex.label.color = "black",
edge.arrow.size = 0.4,
layout=layout_with_fr(g)
)
}
# Custom code to expand an edgelist to include "downstream" nodes that are reachable in the directed graph
expand_edgelist_fast<-function(a_model){
require(igraph)
g <- graph(c(rbind(a_model[,'cause'],a_model[,'effect'])),directed=TRUE)
reachables<-data.frame("cause"=NA,"effect"=NA)
for(each_name in V(g)$name){
outs<-subcomponent(g, each_name, mode = "out")[-1]
ins<-subcomponent(g, each_name, mode = "in")[-1]
subs<-if(length(intersect(outs,ins))>0) c(names(outs),each_name) else names(outs) #Dec 16: changed from >1 to >0, I don't know why this was set this way to start with but is fixed now -- this corrects the one group (group 3 aka group 7 that seemingly had zero loops)
a_reachable<-data.frame("cause"=rep(each_name,length(subs)),"effect"=subs)
reachables<-rbind(reachables,a_reachable)
}
reachables[-1,]
}
# custom code to plot one subcomponent #NOTE DONE YET
subcomponent_graph_plot<-function(grp,vertex_sub){
g<-graph(c(rbind(grp[,"cause"],grp[,"effect"])),directed=TRUE)
s<-subcomponent(g,vertex_sub,mode="out") #this is JUST calling these vertex names. instead, I want ot be reducting the graph to just these nodes.
#TO FIX
sg<-
plot(g,
edge.color = grp$edge.color,
vertex.size = 8,
vertex.label.cex = 0.8,
vertex.label.color = "black",
edge.arrow.size = 0.1,
layout=layout_with_fr(g)
)
}
Load, characterize, and visualize participation and demographic data.
-Note: group numbering used here is sequential, so may not match the numbering used in the folder structure – be sure to check against model name).
-Note: figures show distribution “violins” and also points for each person. Points are shown with a small amount of x and y axis noise (jitter) just to be visible / not overplotted.
#load demographic data
participants <- read.csv("~/Downloads/ARCHESIPadDataEntry_DATA_2023-08-25_1735.csv", header=TRUE)
demo <- participants %>%
filter(!is.na(dob_year)&!is.na(gender)) %>%
group_by(record_id) %>%
summarize(
age=unique(dob_year),
gender=unique(gender))
#do a couple of corrections based on comparison with the screening data
#correct two participant ages that were missing the leading '19' in the datafile
demo[demo$age<1900,'age']<-demo[demo$age<1900,'age']+1900
#add two missing participants with missing data in the the datafile
demo <- rbind(demo,c(5198,1973,1))
demo <- rbind(demo,c(5201,1957,2))
#load deprivation status and merge to demographic data
deprivation <- read.csv("~/Downloads/deprivation status.csv", header=TRUE)
demo <- merge(demo,deprivation[,c('record_id','Deprivation.status')],all=T) #158 participants
#load participation data by group and merge to demographic data
group_participation <- read.csv("~/Downloads/library of models and participants.csv", header=TRUE)
demo <- merge(demo,group_participation,by.x="record_id",by.y="Members") # reduces to 59 participants
#display table of aggregate characteristics
demo %>% group_by(Group) %>% summarize(n_people=n(),birth_year=mean(age), gender=mean(gender), deprivation=mean(Deprivation.status),model=unique(mdl.file.name)) %>% kbl(digits=2) %>% kable_styling()
Group | n_people | birth_year | gender | deprivation | model |
---|---|---|---|---|---|
1 | 8 | 1963.75 | 1.75 | 0.12 | ./CLD-17-03-23-Standardized-v2_Ram_Updated.mdl |
2 | 11 | 1962.27 | 1.91 | 0.27 | ./CLD-18-3-23_LoopsColored_ActionIdeas-JFT_RAM_Updated.mdl |
3 | 6 | 1953.17 | 1.83 | 0.33 | ./CLD.3.19.2023_MM-AW_RAM_Updated.mdl |
4 | 6 | 1954.17 | 2.00 | 0.00 | ./CLD-19-03-23-G2_JF_RAM_Updated.mdl |
5 | 8 | 1955.88 | 1.88 | 0.25 | ./ARCHES GMB Vensim Number 01 03.24.2023_Written by WZ SL_JF_RAM_Updated.mdl |
6 | 7 | 1953.43 | 1.86 | 0.57 | ./ARCHES GMB Vensim Number 02 03.24.2023_Written by CC_Meena&Alexis_Ram_Updated.mdl |
7 | 7 | 1953.14 | 1.86 | 0.43 | ./ARCHES GMB Vensim Number 01 03.25.2023_Written by WZ SL RAM_Updated.mdl |
8 | 6 | 1962.17 | 2.00 | 0.17 | ./GMB-JFT-NR-03-25-23-v2_Ram_Updated.mdl |
#working tabel of aggregate characteristics
group_charatacteristics <- demo %>%
group_by(Group) %>%
summarize(
mean_age=mean(2023-age),
mean_gender=mean(gender),
mean_deprivation=mean(Deprivation.status),
mdl.file.name=unique(mdl.file.name))
#summarize and visualize demographic data
demo %>% ggplot(aes(x=as.factor(Group),y=age,group=as.factor(Group)))+geom_violin()+geom_jitter(width=0.2,height=0.01,pch=1)+scale_x_discrete("Group")+scale_y_continuous("birth year")#some range of differentiation
demo %>% ggplot(aes(x=as.factor(Group),y=gender,group=as.factor(Group)))+geom_violin()+geom_jitter(width=0.2,height=0.01,pch=1)+scale_x_discrete("Group")
demo %>% ggplot(aes(x=as.factor(Group),y=Deprivation.status,group=as.factor(Group)))+geom_violin()+geom_jitter(width=0.2,height=0.01,pch=1)+scale_x_discrete("Group")+scale_y_continuous("deprivation")
Load, characterize and visualize models from edgelists.
-In example plots, light blue indicates “increase” and light pink “decrease”. Dark colors are Action Ideas.
-These are a bit “messy” at the moment but for now it’s just a way to check that the data looks right (i.e. negatives and postives make sense, all the edges are there, AIs are distinguished, etc.)
#Loading and testing edgelists
ARCHES <- read.csv("~/Downloads/edgelist 1.2/All vensim Edgelist1.2.csv", header=TRUE)
ARCHES <- ARCHES %>%
mutate(
cause=trimws(X1st.Variable),
effect=trimws(Second.Variable),
cause_isAI=str_starts(cause,"AI "),
effect_isAI=str_starts(effect,"AI "),
polarity=case_when(
Polarity=="Increases" ~ 1,
Polarity=="Decreases" ~ -1),
edge.color=case_when(
(cause_isAI==TRUE|effect_isAI==TRUE)&polarity==1 ~ "dark blue",
(cause_isAI==FALSE&effect_isAI==FALSE)&polarity==1 ~ "light blue",
(cause_isAI==TRUE|effect_isAI==TRUE)&polarity==-1 ~ "dark red",
(cause_isAI==FALSE&effect_isAI==FALSE)&polarity==-1 ~ "pink"))
# Define groups
ARCHES_grp <- merge(ARCHES,group_charatacteristics,by="mdl.file.name")
ARCHES_grp$model=ARCHES_grp$Group
ARCHES_grp<-ARCHES_grp %>% arrange(model)
grp1<-ARCHES_grp %>% filter(Group==1)
grp2<-ARCHES_grp %>% filter(Group==2)
grp3<-ARCHES_grp %>% filter(Group==3)
grp4<-ARCHES_grp %>% filter(Group==4)
grp5<-ARCHES_grp %>% filter(Group==5)
grp6<-ARCHES_grp %>% filter(Group==6)
grp7<-ARCHES_grp %>% filter(Group==7)
grp8<-ARCHES_grp %>% filter(Group==8)
# summary stats
ARCHES_grp %>% group_by(Group) %>%
summarize (number_edges=n(),
number_unique_nodes=length(unique(c(cause,effect)))) %>% kbl(digits=2) %>% kable_styling()
Group | number_edges | number_unique_nodes |
---|---|---|
1 | 88 | 48 |
2 | 121 | 86 |
3 | 100 | 82 |
4 | 115 | 75 |
5 | 129 | 72 |
6 | 132 | 84 |
7 | 108 | 62 |
8 | 154 | 82 |
#Group 1
graph_plot(grp1)
#Group 2
graph_plot(grp2)
#Group 3
graph_plot(grp3)
#Group 4
graph_plot(grp4)
#Group 5
graph_plot(grp5)
#Group 6
graph_plot(grp6)
#Group 7
graph_plot(grp7)
#Group 8
graph_plot(grp8)
What are common nodes and common direct links across groups?
Some nodes are widely shared, with Crime, Exercise, Access to quality education, Access to quality healthcare, Family caregiving, Income, and Stress being shared by 6 or 7 groups, and Stress connected to many more edges than other nodes, followed by Income, Access to quality healthcare, Crime, and Dementia
ARCHES_grp_cause_as_node<-ARCHES_grp %>% mutate(node=cause)
ARCHES_grp_effect_as_node<-ARCHES_grp %>% mutate(node=effect)
ARCHES_grp_nodes<-rbind(ARCHES_grp_cause_as_node,ARCHES_grp_effect_as_node)
ARCHES_grp_nodes %>%
group_by(node) %>%
summarize(n=n(),n_groups=length(unique(Group)),groups=paste(unique(model),collapse=", ")) -> by_node
#nodes shared by 6 or more groups, sorted by number of groups
by_node %>% filter(n_groups>5) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()
node | n | n_groups | groups |
---|---|---|---|
Crime | 37 | 7 | 2, 3, 4, 5, 6, 7, 8 |
Exercise | 27 | 7 | 1, 2, 3, 4, 5, 7, 8 |
Access to quality education | 21 | 6 | 2, 3, 4, 5, 6, 8 |
Access to quality healthcare | 41 | 6 | 1, 3, 4, 6, 8, 7 |
Family caregiving | 31 | 6 | 2, 4, 5, 6, 7, 8 |
Income | 47 | 6 | 1, 2, 3, 4, 6, 8 |
Stress | 59 | 6 | 3, 4, 5, 6, 7, 8 |
#nodes reported 35 or more times, sorted by number of edges with node as cause or effect
by_node %>% filter(n>35) %>% arrange(desc(n)) %>% kbl(digits=2) %>% kable_styling()
node | n | n_groups | groups |
---|---|---|---|
Stress | 59 | 6 | 3, 4, 5, 6, 7, 8 |
Income | 47 | 6 | 1, 2, 3, 4, 6, 8 |
Access to quality healthcare | 41 | 6 | 1, 3, 4, 6, 8, 7 |
Crime | 37 | 7 | 2, 3, 4, 5, 6, 7, 8 |
Dementia | 36 | 5 | 2, 3, 5, 8, 7 |
In contrast directly expressed edges are not commonly shared across groups: only 5 edges shared by 3 groups: Exercise Decreases Chronic disease, Exercise Increases Physical health, Genetics Increases Dementia, Stress Decreases Mental health, and Stress Increases Mental health disorders, and 31 are shared by 2 — all others are unique
ARCHES_grp %>%
mutate(link=paste(cause,polarity,effect)) %>%
group_by(link) %>%
summarize(n=n(),n_groups=length(unique(Group)),groups=paste(model,collapse=", ")) -> by_direct_link
#edges reported by more than 1 group, sorted by number of groups
by_direct_link %>% filter(n_groups>1) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()
link | n | n_groups | groups |
---|---|---|---|
Exercise -1 Chronic disease | 3 | 3 | 4, 7, 8 |
Exercise 1 Physical health | 3 | 3 | 2, 3, 7 |
Genetics 1 Dementia | 3 | 3 | 2, 7, 8 |
Stress -1 Mental health | 3 | 3 | 3, 4, 7 |
Stress 1 Mental health disorders | 3 | 3 | 5, 6, 8 |
Access to quality diet 1 Physical health | 2 | 2 | 7, 8 |
Access to quality education 1 Education level | 2 | 2 | 2, 3 |
Access to quality education 1 Level of Education | 2 | 2 | 4, 5 |
Access to quality jobs 1 Income | 2 | 2 | 1, 2 |
Brain activity -1 Dementia | 2 | 2 | 3, 5 |
Crime -1 Black businesses | 2 | 2 | 7, 8 |
Crime -1 Safe neighborhood | 2 | 2 | 6, 8 |
Crime 1 Food desert | 2 | 2 | 4, 8 |
Crime 1 Stress | 2 | 2 | 6, 7 |
Dementia 1 Family caregiving | 2 | 2 | 2, 8 |
Dementia 1 Mental health disorders | 2 | 2 | 5, 8 |
Education level 1 Access to quality jobs | 2 | 2 | 1, 2 |
Education level 1 Quality job | 2 | 2 | 3, 7 |
Family caregiving -1 Caregiver leisure time | 2 | 2 | 5, 7 |
Family caregiving -1 Income | 2 | 2 | 2, 4 |
Family caregiving 1 Stress | 2 | 2 | 5, 8 |
Health literacy 1 Exercise | 2 | 2 | 1, 5 |
Income -1 Crime | 2 | 2 | 6, 8 |
Income -1 Medicaid | 2 | 2 | 2, 8 |
Income -1 Poverty | 2 | 2 | 1, 4 |
Income 1 Health insurance | 2 | 2 | 1, 3 |
Income 1 Private transportation | 2 | 2 | 1, 2 |
Mental health disorders 1 Crime | 2 | 2 | 5, 8 |
Mental health disorders 1 Dementia | 2 | 2 | 5, 8 |
Misdiagnosis 1 Dementia | 2 | 2 | 2, 5 |
Private transportation 1 Access to quality healthcare | 2 | 2 | 1, 7 |
Public Transportation 1 Access to quality healthcare | 2 | 2 | 1, 4 |
Public transportation 1 Access to quality healthcare | 2 | 2 | 7, 8 |
Safe neighborhood 1 Exercise | 2 | 2 | 4, 8 |
Scholarships 1 African American healthcare providers | 2 | 2 | 2, 6 |
Stress 1 Dementia risk | 2 | 2 | 4, 6 |
If we expand our models to consider indirect edges (connections between any node in a model and all other nodes reachable from it), there are more similarities:
3 indirect edges are shared by 5 groups: crime connected to crime, exercise connected to physical health and systemic racism connected to access to quality jobs; 36 indirect edges are shared by 4 groups of which the most common causes are Access to quality education, exercise, crime, family caregiving, and income, and the most common effects are dementia, chronic disease, physical health, and stress.
Note this does not consider the summed polarity of the connections, just the fact of one node (the ‘effect’) being reach from another (the ‘cause’) through causal connections. This will also show looping structures (e.g. “Crime is downstream of Crime” indicates a loop)
#Expand all models
expanded_data<-data.frame("cause"=NA,"effect"=NA,"model"=NA) #set up blank dataframe
#expand each model in turn
for(i in unique(ARCHES_grp$model)){
one_model <- ARCHES_grp %>% filter(model==i)
one_model_expanded_all <- expand_edgelist_fast(one_model)
one_model_expanded <- one_model_expanded_all #note comment out this line and allow the next one
one_model_expanded$model=i
expanded_data<-rbind(expanded_data,data.frame(one_model_expanded))
finished_models<-length(unique(expanded_data$model))-1
}
expanded_data <- expanded_data[-1,]
expanded_data %>%
group_by(cause,effect) %>%
summarize(n=n(),n_groups=length(unique(model)),groups=paste(model,collapse=", ")) -> by_indirect_link
by_indirect_link %>% filter(n_groups>3) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()
cause | effect | n | n_groups | groups |
---|---|---|---|---|
Crime | Crime | 5 | 5 | 2, 4, 5, 6, 8 |
Exercise | Physical health | 5 | 5 | 1, 2, 3, 7, 8 |
Systemic racism | Access to quality jobs | 5 | 5 | 1, 2, 5, 6, 8 |
Access to quality education | Access to quality jobs | 4 | 4 | 2, 5, 6, 8 |
Access to quality education | Chronic disease | 4 | 4 | 2, 3, 4, 8 |
Access to quality education | Crime | 4 | 4 | 2, 3, 5, 8 |
Access to quality education | Dementia | 4 | 4 | 2, 3, 5, 8 |
Access to quality education | Income | 4 | 4 | 2, 3, 4, 8 |
Access to quality education | Stress | 4 | 4 | 3, 5, 6, 8 |
Access to quality jobs | Access to quality jobs | 4 | 4 | 1, 2, 5, 8 |
Chronic disease | Chronic disease | 4 | 4 | 2, 4, 7, 8 |
Chronic disease | Dementia | 4 | 4 | 2, 3, 7, 8 |
Crime | Chronic disease | 4 | 4 | 2, 4, 7, 8 |
Crime | Dementia | 4 | 4 | 2, 5, 7, 8 |
Crime | Family caregiving | 4 | 4 | 2, 5, 7, 8 |
Crime | Stress | 4 | 4 | 5, 6, 7, 8 |
Dementia | Dementia | 4 | 4 | 2, 3, 5, 8 |
Education level | Physical health | 4 | 4 | 1, 2, 3, 7 |
Exercise | Access to quality healthcare | 4 | 4 | 1, 4, 7, 8 |
Exercise | Access to quality jobs | 4 | 4 | 1, 2, 5, 8 |
Exercise | Chronic disease | 4 | 4 | 2, 4, 7, 8 |
Exercise | Dementia | 4 | 4 | 2, 5, 7, 8 |
Exercise | Family caregiving | 4 | 4 | 2, 5, 7, 8 |
Family caregiving | Chronic disease | 4 | 4 | 2, 4, 7, 8 |
Family caregiving | Dementia | 4 | 4 | 2, 5, 7, 8 |
Family caregiving | Family caregiving | 4 | 4 | 2, 5, 7, 8 |
Family caregiving | Stress | 4 | 4 | 5, 6, 7, 8 |
Health literacy | Health literacy | 4 | 4 | 1, 2, 6, 7 |
Income | Access to quality healthcare | 4 | 4 | 1, 3, 4, 8 |
Income | Chronic disease | 4 | 4 | 2, 3, 4, 8 |
Income | Income | 4 | 4 | 1, 2, 4, 8 |
Income | Physical health | 4 | 4 | 1, 2, 3, 8 |
Private transportation | Physical health | 4 | 4 | 1, 2, 3, 7 |
Public transportation | Physical health | 4 | 4 | 2, 3, 7, 8 |
Social isolation | Dementia | 4 | 4 | 2, 3, 5, 8 |
Stress | Dementia | 4 | 4 | 3, 5, 7, 8 |
Stress | Stress | 4 | 4 | 5, 6, 7, 8 |
Systemic racism | Access to quality education | 4 | 4 | 2, 5, 6, 8 |
Systemic racism | Mental health disorders | 4 | 4 | 1, 5, 6, 8 |
Systemic racism | Trust on medical expertise | 4 | 4 | 2, 5, 6, 8 |
Finally, one way to focus down models to the systems they express is to only consider nodes that are involved in loops. To do this we construct a list of nodes in models that are reachable from themselves.
This lets us visualize simplified models from each group, but also means there is less similarity among groups.
loops_only <- expanded_data %>% filter(cause==effect)
grp1_loops<- grp1 %>%
filter(cause %in% loops_only[loops_only$model==1,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==1,'effect'])
grp2_loops<- grp2 %>%
filter(cause %in% loops_only[loops_only$model==2,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==2,'effect'])
grp3_loops<- grp3 %>%
filter(cause %in% loops_only[loops_only$model==3,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==3,'effect'])
grp4_loops<- grp4 %>%
filter(cause %in% loops_only[loops_only$model==4,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==4,'effect'])
grp5_loops<- grp5 %>%
filter(cause %in% loops_only[loops_only$model==5,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==5,'effect'])
grp6_loops<- grp6 %>%
filter(cause %in% loops_only[loops_only$model==6,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==6,'effect'])
grp7_loops<- grp7 %>%
filter(cause %in% loops_only[loops_only$model==7,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==7,'effect'])
grp8_loops<- grp8 %>%
filter(cause %in% loops_only[loops_only$model==8,'cause']) %>%
filter(effect %in% loops_only[loops_only$model==8,'effect'])
#to compare
ARCHES_loops <- rbind(grp1_loops,grp2_loops,grp3_loops,grp4_loops,grp5_loops,grp6_loops,grp7_loops,grp8_loops)
#Group 1
graph_plot(grp1_loops)
#Group 2
graph_plot(grp2_loops)
#Group 3
graph_plot(grp3_loops)
#Group 4
graph_plot(grp4_loops)
#Group 5
graph_plot(grp5_loops)
#Group 6
graph_plot(grp6_loops)
#Group 7
graph_plot(grp7_loops)
#Group 8
graph_plot(grp8_loops)
The most widely shared nodes involved in these loops is Crime (5 groups). 7 more are shared by 4 groups. In contrast, edges are shared by fewer groups (only 14 edges by more than 1 group)./
loops_only <- expanded_data %>% filter(cause==effect)
ARCHES_loops %>% mutate(node=cause)->loops_causes
ARCHES_loops %>% mutate(node=effect)->loops_effects
rbind(loops_causes,loops_effects)->loops_nodes
loops_nodes %>%
group_by(node) %>%
summarize(n=n(),n_groups=length(unique(model)),groups=paste(model,collapse=", ")) -> by_loops_nodes
by_loops_nodes %>% filter(n_groups>3) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()
node | n | n_groups | groups |
---|---|---|---|
Crime | 25 | 5 | 2, 4, 4, 4, 5, 5, 6, 6, 8, 8, 8, 8, 8, 2, 4, 5, 5, 5, 5, 6, 6, 6, 8, 8, 8 |
Access to quality jobs | 19 | 4 | 1, 1, 1, 2, 2, 5, 5, 8, 8, 1, 1, 1, 2, 2, 5, 5, 8, 8, 8 |
Chronic disease | 16 | 4 | 2, 4, 4, 7, 7, 8, 2, 4, 4, 7, 8, 8, 8, 8, 8, 8 |
Dementia | 22 | 4 | 2, 2, 2, 3, 5, 5, 8, 8, 8, 8, 8, 2, 2, 2, 3, 5, 5, 5, 5, 5, 8, 8 |
Family caregiving | 18 | 4 | 2, 5, 5, 7, 7, 8, 8, 8, 8, 8, 8, 2, 5, 7, 7, 7, 8, 8 |
Health literacy | 20 | 4 | 1, 1, 1, 1, 2, 2, 2, 6, 6, 7, 1, 1, 1, 1, 1, 2, 2, 2, 6, 7 |
Income | 27 | 4 | 1, 1, 1, 1, 1, 1, 1, 2, 2, 4, 4, 8, 8, 8, 8, 8, 8, 8, 1, 2, 2, 2, 4, 8, 8, 8, 8 |
Stress | 32 | 4 | 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8 |
ARCHES_loops %>%
group_by(cause,polarity,effect) %>%
summarize(n=n(),n_groups=length(unique(model)),groups=paste(model,collapse=", ")) -> by_loops
by_loops %>% filter(n_groups>1) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()
cause | polarity | effect | n | n_groups | groups |
---|---|---|---|---|---|
Access to quality diet | 1 | Physical health | 2 | 2 | 7, 8 |
Access to quality jobs | 1 | Income | 2 | 2 | 1, 2 |
Crime | -1 | Safe neighborhood | 2 | 2 | 6, 8 |
Crime | 1 | Food desert | 2 | 2 | 4, 8 |
Dementia | 1 | Family caregiving | 2 | 2 | 2, 8 |
Dementia | 1 | Mental health disorders | 2 | 2 | 5, 8 |
Education level | 1 | Access to quality jobs | 2 | 2 | 1, 2 |
Family caregiving | 1 | Stress | 2 | 2 | 5, 8 |
Income | -1 | Poverty | 2 | 2 | 1, 4 |
Income | 1 | Private transportation | 2 | 2 | 1, 2 |
Mental health disorders | 1 | Crime | 2 | 2 | 5, 8 |
Mental health disorders | 1 | Dementia | 2 | 2 | 5, 8 |
Misdiagnosis | 1 | Dementia | 2 | 2 | 2, 5 |
Stress | 1 | Mental health disorders | 2 | 2 | 5, 8 |
We can make matrices of each of these data types (nodes, direct edges, indirect edges, direct edges involved in loops) and use them to quantitatively display similarity and differences among groups with ordinates, as well as run environmental fit analyses to see if the distance of groups in “model space” (defined, again, but nodes, edges, etc.) fits more losely to average age, gender, and deprivation values for each group, then expected based on comparing against 999 randomizations.
#this is making matrices
m_nodes<-
ARCHES_grp_nodes %>%
mutate(model=Group,chain=node) %>%
group_by(model,chain) %>% summarize(count=1) %>%
pivot_wider(id_cols=model,names_from=chain,values_from=count,values_fill=0) %>% as.data.frame()
rownames(m_nodes)<-m_nodes[,1]
m_nodes<-m_nodes[,-1]
m_direct<-
ARCHES_grp %>%
mutate(presence=1,chain=paste(cause, polarity,effect)) %>%
pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0) %>% as.data.frame()
rownames(m_direct)<-m_direct[,1]
m_direct<-m_direct[,-1]
m_indirect<-
expanded_data %>%
mutate(presence=1,chain=paste(cause, effect)) %>%
pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0) %>% as.data.frame()
rownames(m_indirect)<-m_indirect[,1]
m_indirect<-m_indirect[,-1]
m_loops<-
ARCHES_loops %>%
mutate(presence=1,chain=paste(cause,polarity,effect)) %>%
pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0) %>% as.data.frame()
rownames(m_loops)<-m_loops[,1]
m_loops<-m_loops[,-1]
m_loops<-
ARCHES_loops %>%
mutate(presence=1,chain=paste(cause,polarity,effect)) %>%
pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0) %>% as.data.frame()
rownames(m_loops)<-m_loops[,1]
m_loops<-m_loops[,-1]
##Okay let's do ordinations
require(vegan)
#characteristics
characteristics<-ARCHES_grp %>% group_by(model) %>% summarize(mean_age=unique(mean_age),mean_gender=unique(mean_gender),mean_deprivation=unique(mean_deprivation)) %>% as.data.frame()
rownames(characteristics)<-characteristics[,1]
characteristics[,1]<-as.factor(characteristics[,1])
#####Node
mds_nodes<-metaMDS(m_nodes,distance="euclidian",trace=0) #this makes the NMDS object
#ordiplot(mds_nodes, type='t',display="sites")
envfit(mds_nodes,characteristics) #nothing
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## mean_age 0.54511 0.83836 0.4410 0.253
## mean_gender 0.13953 -0.99022 0.4930 0.166
## mean_deprivation 0.07993 0.99680 0.3843 0.254
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## model1 -0.0424 -0.0585
## model2 -5.4393 -0.9329
## model3 -3.7574 4.4054
## model4 4.1500 -2.8573
## model5 1.4326 1.7399
## model6 5.6443 2.6941
## model7 -0.8480 -0.1802
## model8 -1.1396 -4.8105
##
## Goodness of fit:
## r2 Pr(>r)
## model 1 1
## Permutation: free
## Number of permutations: 999
tabasco(m_nodes[,colSums(m_nodes)>3],use=hclust(vegdist(m_nodes[,colSums(m_nodes)>0])))
#(6,5) (2,8) (3,7)
#####Direct
mds_direct<-metaMDS(m_direct,distance="bray",trace=0) #this makes the NMDS object
#ordiplot(mds_direct)
#ordiplot(mds_direct, type='t',display="sites")
envfit(mds_direct,characteristics) #nothing
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## mean_age 0.86260 0.50588 0.0546 0.851
## mean_gender 0.91687 0.39919 0.1818 0.608
## mean_deprivation 0.68451 -0.72900 0.1739 0.637
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## model1 -0.3329 0.1571
## model2 -0.1614 -0.1574
## model3 -0.4241 -0.1305
## model4 0.0333 0.3284
## model5 0.2072 -0.3705
## model6 0.5506 0.0932
## model7 -0.0931 0.1005
## model8 0.2205 -0.0207
##
## Goodness of fit:
## r2 Pr(>r)
## model 1 1
## Permutation: free
## Number of permutations: 999
tabasco(m_direct[,colSums(m_direct)>1],use=hclust(vegdist(m_direct[,colSums(m_direct)>0])))
#(7,8) (1,2)
#indirect
mds_indirect<-metaMDS(m_indirect,distance="bray",trace=0) #this makes the NMDS object
#ordiplot(mds_indirect)
envfit(mds_indirect,characteristics) #deprivation, nonsignificantly
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## mean_age 0.67728 -0.73573 0.1839 0.572
## mean_gender 0.49027 0.87157 0.0223 0.945
## mean_deprivation -0.18724 -0.98231 0.6774 0.054 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## model1 -0.1083 0.3149
## model2 -0.1454 -0.1797
## model3 0.5384 -0.2803
## model4 0.2691 0.3741
## model5 -0.3973 0.0508
## model6 -0.3938 -0.2280
## model7 0.2686 -0.0433
## model8 -0.0313 -0.0085
##
## Goodness of fit:
## r2 Pr(>r)
## model 1 1
## Permutation: free
## Number of permutations: 999
#(6,5) (2,8) (3,7) (equivalent to nodes)
tabasco(m_indirect[,colSums(m_indirect)>3],use=hclust(vegdist(m_indirect[,colSums(m_indirect)>0])))
#loop
mds_loops<-metaMDS(m_loops,distance="euclidian",trace=0) #this makes the NMDS object
#ordiplot(mds_loops)
#ordiplot(mds_loops, type='t',display="sites")
envfit(mds_loops,characteristics) #age,non-significantly
##
## ***VECTORS
##
## NMDS1 NMDS2 r2 Pr(>r)
## mean_age 0.36583 -0.93068 0.0192 0.953
## mean_gender 0.43192 -0.90191 0.4948 0.168
## mean_deprivation 0.14992 0.98870 0.0325 0.932
## Permutation: free
## Number of permutations: 999
##
## ***FACTORS:
##
## Centroids:
## NMDS1 NMDS2
## model1 -2.7663 3.7183
## model2 -5.4224 -2.2948
## model3 -1.2771 -0.2000
## model4 -1.0257 -0.7172
## model5 1.2235 -2.8502
## model6 0.8903 2.4196
## model7 0.9588 0.1941
## model8 7.4189 -0.2698
##
## Goodness of fit:
## r2 Pr(>r)
## model 1 1
## Permutation: free
## Number of permutations: 999
#tabasco(m_loops[,colSums(m_loops)>1],use=hclust(vegdist(m_loops[,colSums(m_loops)>1])))