ARCHES

0) Goals:

Characterize structure stratified by age, gender, deprivation status

# Require the igraph package, used for plotting and "reachableness"
# Require tidyverse package for data manipulation
# Require kableExtra (just for table layout in this rMarkdown file)

library(igraph)
library(tidyverse)
library(kableExtra)

# custom code for plots
graph_plot<-function(grp){
  g <- graph(c(rbind(grp[,"cause"],grp[,"effect"])),directed=TRUE)
  
  plot(g,
       edge.color = grp$edge.color,
       vertex.size = 8,
       vertex.label.cex = 0.8,
       vertex.label.color = "black",
       edge.arrow.size = 0.4,
       layout=layout_with_fr(g)
  )
}

# Custom code to expand an edgelist to include "downstream" nodes that are reachable in the directed graph
expand_edgelist_fast<-function(a_model){
  require(igraph)
  g <- graph(c(rbind(a_model[,'cause'],a_model[,'effect'])),directed=TRUE)
  reachables<-data.frame("cause"=NA,"effect"=NA)
  
  for(each_name in V(g)$name){
    
    outs<-subcomponent(g, each_name, mode = "out")[-1]
    ins<-subcomponent(g, each_name, mode = "in")[-1]
    subs<-if(length(intersect(outs,ins))>0) c(names(outs),each_name) else names(outs) #Dec 16: changed from >1 to >0, I don't know why this was set this way to start with but is fixed now -- this corrects the one group (group 3 aka group 7 that seemingly had zero loops)
    a_reachable<-data.frame("cause"=rep(each_name,length(subs)),"effect"=subs)
    reachables<-rbind(reachables,a_reachable)
  }
  reachables[-1,]  
}

# custom code to plot one subcomponent #NOTE DONE YET
subcomponent_graph_plot<-function(grp,vertex_sub){
  g<-graph(c(rbind(grp[,"cause"],grp[,"effect"])),directed=TRUE)
  
  s<-subcomponent(g,vertex_sub,mode="out") #this is JUST calling these vertex names. instead, I want ot be reducting the graph to just these nodes.
  #TO FIX
  sg<-
  
  plot(g,
       edge.color = grp$edge.color,
       vertex.size = 8,
       vertex.label.cex = 0.8,
       vertex.label.color = "black",
       edge.arrow.size = 0.1,
       layout=layout_with_fr(g)
  )
}

1) Group structure

Load, characterize, and visualize participation and demographic data.

-Note: group numbering used here is sequential, so may not match the numbering used in the folder structure – be sure to check against model name).

-Note: figures show distribution “violins” and also points for each person. Points are shown with a small amount of x and y axis noise (jitter) just to be visible / not overplotted.

#load demographic data
participants <- read.csv("~/Downloads/ARCHESIPadDataEntry_DATA_2023-08-25_1735.csv", header=TRUE)
demo <- participants %>%
  filter(!is.na(dob_year)&!is.na(gender)) %>%
  group_by(record_id) %>%
  summarize(
    age=unique(dob_year),
    gender=unique(gender))

#do a couple of corrections based on comparison with the screening data
#correct two participant ages that were missing the leading '19' in the datafile
demo[demo$age<1900,'age']<-demo[demo$age<1900,'age']+1900
#add two missing participants with missing data in the the datafile 
demo <- rbind(demo,c(5198,1973,1))
demo <- rbind(demo,c(5201,1957,2)) 

#load deprivation status and merge to demographic data
deprivation <- read.csv("~/Downloads/deprivation status.csv", header=TRUE)
demo <- merge(demo,deprivation[,c('record_id','Deprivation.status')],all=T) #158 participants

#load participation data by group and merge to demographic data
group_participation <- read.csv("~/Downloads/library of models and participants.csv", header=TRUE)
demo <- merge(demo,group_participation,by.x="record_id",by.y="Members") # reduces to 59 participants

#display table of aggregate characteristics
demo %>% group_by(Group) %>% summarize(n_people=n(),birth_year=mean(age), gender=mean(gender), deprivation=mean(Deprivation.status),model=unique(mdl.file.name)) %>% kbl(digits=2) %>% kable_styling()

Group	n_people	birth_year	gender	deprivation	model
1	8	1963.75	1.75	0.12	./CLD-17-03-23-Standardized-v2_Ram_Updated.mdl
2	11	1962.27	1.91	0.27	./CLD-18-3-23_LoopsColored_ActionIdeas-JFT_RAM_Updated.mdl
3	6	1953.17	1.83	0.33	./CLD.3.19.2023_MM-AW_RAM_Updated.mdl
4	6	1954.17	2.00	0.00	./CLD-19-03-23-G2_JF_RAM_Updated.mdl
5	8	1955.88	1.88	0.25	./ARCHES GMB Vensim Number 01 03.24.2023_Written by WZ SL_JF_RAM_Updated.mdl
6	7	1953.43	1.86	0.57	./ARCHES GMB Vensim Number 02 03.24.2023_Written by CC_Meena&Alexis_Ram_Updated.mdl
7	7	1953.14	1.86	0.43	./ARCHES GMB Vensim Number 01 03.25.2023_Written by WZ SL RAM_Updated.mdl
8	6	1962.17	2.00	0.17	./GMB-JFT-NR-03-25-23-v2_Ram_Updated.mdl

#working tabel of aggregate characteristics
group_charatacteristics <- demo %>%
  group_by(Group) %>%
  summarize(
    mean_age=mean(2023-age),
    mean_gender=mean(gender),
    mean_deprivation=mean(Deprivation.status),
    mdl.file.name=unique(mdl.file.name))


#summarize and visualize demographic data
demo %>% ggplot(aes(x=as.factor(Group),y=age,group=as.factor(Group)))+geom_violin()+geom_jitter(width=0.2,height=0.01,pch=1)+scale_x_discrete("Group")+scale_y_continuous("birth year")#some range of differentiation

demo %>% ggplot(aes(x=as.factor(Group),y=gender,group=as.factor(Group)))+geom_violin()+geom_jitter(width=0.2,height=0.01,pch=1)+scale_x_discrete("Group")

demo %>% ggplot(aes(x=as.factor(Group),y=Deprivation.status,group=as.factor(Group)))+geom_violin()+geom_jitter(width=0.2,height=0.01,pch=1)+scale_x_discrete("Group")+scale_y_continuous("deprivation")

2) Models

Load, characterize and visualize models from edgelists.

-In example plots, light blue indicates “increase” and light pink “decrease”. Dark colors are Action Ideas.

-These are a bit “messy” at the moment but for now it’s just a way to check that the data looks right (i.e. negatives and postives make sense, all the edges are there, AIs are distinguished, etc.)

#Loading and testing edgelists
ARCHES <- read.csv("~/Downloads/edgelist 1.2/All vensim Edgelist1.2.csv", header=TRUE)

ARCHES <- ARCHES %>%
  mutate(
    cause=trimws(X1st.Variable),
    effect=trimws(Second.Variable),
    cause_isAI=str_starts(cause,"AI "),
    effect_isAI=str_starts(effect,"AI "),
    polarity=case_when(
      Polarity=="Increases" ~ 1,
      Polarity=="Decreases" ~ -1),
    edge.color=case_when(
      (cause_isAI==TRUE|effect_isAI==TRUE)&polarity==1 ~ "dark blue",
      (cause_isAI==FALSE&effect_isAI==FALSE)&polarity==1 ~ "light blue",
      (cause_isAI==TRUE|effect_isAI==TRUE)&polarity==-1 ~ "dark red",
      (cause_isAI==FALSE&effect_isAI==FALSE)&polarity==-1 ~ "pink"))

# Define groups
ARCHES_grp <- merge(ARCHES,group_charatacteristics,by="mdl.file.name")

ARCHES_grp$model=ARCHES_grp$Group
ARCHES_grp<-ARCHES_grp %>% arrange(model)

grp1<-ARCHES_grp %>% filter(Group==1)
grp2<-ARCHES_grp %>% filter(Group==2)
grp3<-ARCHES_grp %>% filter(Group==3)
grp4<-ARCHES_grp %>% filter(Group==4)
grp5<-ARCHES_grp %>% filter(Group==5)
grp6<-ARCHES_grp %>% filter(Group==6)
grp7<-ARCHES_grp %>% filter(Group==7)
grp8<-ARCHES_grp %>% filter(Group==8)

# summary stats
ARCHES_grp %>% group_by(Group) %>%
  summarize (number_edges=n(),
             number_unique_nodes=length(unique(c(cause,effect)))) %>% kbl(digits=2) %>% kable_styling()

Group	number_edges	number_unique_nodes
1	88	48
2	121	86
3	100	82
4	115	75
5	129	72
6	132	84
7	108	62
8	154	82

#Group 1
graph_plot(grp1)

#Group 2
graph_plot(grp2)

#Group 3
graph_plot(grp3)

#Group 4
graph_plot(grp4)

#Group 5
graph_plot(grp5)

#Group 6
graph_plot(grp6)

#Group 7
graph_plot(grp7)

#Group 8
graph_plot(grp8)

3) Commonalities and differences among groups

3.1 Nodes

What are common nodes and common direct links across groups?

Some nodes are widely shared, with Crime, Exercise, Access to quality education, Access to quality healthcare, Family caregiving, Income, and Stress being shared by 6 or 7 groups, and Stress connected to many more edges than other nodes, followed by Income, Access to quality healthcare, Crime, and Dementia

ARCHES_grp_cause_as_node<-ARCHES_grp %>% mutate(node=cause)
ARCHES_grp_effect_as_node<-ARCHES_grp %>% mutate(node=effect)
ARCHES_grp_nodes<-rbind(ARCHES_grp_cause_as_node,ARCHES_grp_effect_as_node)

ARCHES_grp_nodes %>%
  group_by(node) %>%
  summarize(n=n(),n_groups=length(unique(Group)),groups=paste(unique(model),collapse=", ")) -> by_node

#nodes shared by 6 or more groups, sorted by number of groups
by_node %>% filter(n_groups>5) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()

node	n	n_groups	groups
Crime	37	7	2, 3, 4, 5, 6, 7, 8
Exercise	27	7	1, 2, 3, 4, 5, 7, 8
Access to quality education	21	6	2, 3, 4, 5, 6, 8
Access to quality healthcare	41	6	1, 3, 4, 6, 8, 7
Family caregiving	31	6	2, 4, 5, 6, 7, 8
Income	47	6	1, 2, 3, 4, 6, 8
Stress	59	6	3, 4, 5, 6, 7, 8

#nodes reported 35 or more times, sorted by number of edges with node as cause or effect
by_node %>% filter(n>35) %>% arrange(desc(n)) %>% kbl(digits=2) %>% kable_styling()

node	n	n_groups	groups
Stress	59	6	3, 4, 5, 6, 7, 8
Income	47	6	1, 2, 3, 4, 6, 8
Access to quality healthcare	41	6	1, 3, 4, 6, 8, 7
Crime	37	7	2, 3, 4, 5, 6, 7, 8
Dementia	36	5	2, 3, 5, 8, 7

3.2 Direct edges

In contrast directly expressed edges are not commonly shared across groups: only 5 edges shared by 3 groups: Exercise Decreases Chronic disease, Exercise Increases Physical health, Genetics Increases Dementia, Stress Decreases Mental health, and Stress Increases Mental health disorders, and 31 are shared by 2 — all others are unique

ARCHES_grp %>%
  mutate(link=paste(cause,polarity,effect)) %>%
  group_by(link) %>%
  summarize(n=n(),n_groups=length(unique(Group)),groups=paste(model,collapse=", ")) -> by_direct_link

#edges reported by more than 1 group, sorted by number of groups
by_direct_link %>% filter(n_groups>1) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()

link	n	n_groups	groups
Exercise -1 Chronic disease	3	3	4, 7, 8
Exercise 1 Physical health	3	3	2, 3, 7
Genetics 1 Dementia	3	3	2, 7, 8
Stress -1 Mental health	3	3	3, 4, 7
Stress 1 Mental health disorders	3	3	5, 6, 8
Access to quality diet 1 Physical health	2	2	7, 8
Access to quality education 1 Education level	2	2	2, 3
Access to quality education 1 Level of Education	2	2	4, 5
Access to quality jobs 1 Income	2	2	1, 2
Brain activity -1 Dementia	2	2	3, 5
Crime -1 Black businesses	2	2	7, 8
Crime -1 Safe neighborhood	2	2	6, 8
Crime 1 Food desert	2	2	4, 8
Crime 1 Stress	2	2	6, 7
Dementia 1 Family caregiving	2	2	2, 8
Dementia 1 Mental health disorders	2	2	5, 8
Education level 1 Access to quality jobs	2	2	1, 2
Education level 1 Quality job	2	2	3, 7
Family caregiving -1 Caregiver leisure time	2	2	5, 7
Family caregiving -1 Income	2	2	2, 4
Family caregiving 1 Stress	2	2	5, 8
Health literacy 1 Exercise	2	2	1, 5
Income -1 Crime	2	2	6, 8
Income -1 Medicaid	2	2	2, 8
Income -1 Poverty	2	2	1, 4
Income 1 Health insurance	2	2	1, 3
Income 1 Private transportation	2	2	1, 2
Mental health disorders 1 Crime	2	2	5, 8
Mental health disorders 1 Dementia	2	2	5, 8
Misdiagnosis 1 Dementia	2	2	2, 5
Private transportation 1 Access to quality healthcare	2	2	1, 7
Public Transportation 1 Access to quality healthcare	2	2	1, 4
Public transportation 1 Access to quality healthcare	2	2	7, 8
Safe neighborhood 1 Exercise	2	2	4, 8
Scholarships 1 African American healthcare providers	2	2	2, 6
Stress 1 Dementia risk	2	2	4, 6

3.3 Indirect edges

If we expand our models to consider indirect edges (connections between any node in a model and all other nodes reachable from it), there are more similarities:

3 indirect edges are shared by 5 groups: crime connected to crime, exercise connected to physical health and systemic racism connected to access to quality jobs; 36 indirect edges are shared by 4 groups of which the most common causes are Access to quality education, exercise, crime, family caregiving, and income, and the most common effects are dementia, chronic disease, physical health, and stress.

Note this does not consider the summed polarity of the connections, just the fact of one node (the ‘effect’) being reach from another (the ‘cause’) through causal connections. This will also show looping structures (e.g. “Crime is downstream of Crime” indicates a loop)

#Expand all models
expanded_data<-data.frame("cause"=NA,"effect"=NA,"model"=NA) #set up blank dataframe
#expand each model in turn
for(i in unique(ARCHES_grp$model)){
  one_model <- ARCHES_grp %>% filter(model==i)
  one_model_expanded_all <- expand_edgelist_fast(one_model)
  one_model_expanded <- one_model_expanded_all #note comment out this line and allow the next one
  one_model_expanded$model=i
  expanded_data<-rbind(expanded_data,data.frame(one_model_expanded))
  finished_models<-length(unique(expanded_data$model))-1
}
expanded_data <- expanded_data[-1,]

expanded_data %>%
  group_by(cause,effect) %>%
  summarize(n=n(),n_groups=length(unique(model)),groups=paste(model,collapse=", ")) -> by_indirect_link

by_indirect_link %>% filter(n_groups>3) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()

cause	effect	n	n_groups	groups
Crime	Crime	5	5	2, 4, 5, 6, 8
Exercise	Physical health	5	5	1, 2, 3, 7, 8
Systemic racism	Access to quality jobs	5	5	1, 2, 5, 6, 8
Access to quality education	Access to quality jobs	4	4	2, 5, 6, 8
Access to quality education	Chronic disease	4	4	2, 3, 4, 8
Access to quality education	Crime	4	4	2, 3, 5, 8
Access to quality education	Dementia	4	4	2, 3, 5, 8
Access to quality education	Income	4	4	2, 3, 4, 8
Access to quality education	Stress	4	4	3, 5, 6, 8
Access to quality jobs	Access to quality jobs	4	4	1, 2, 5, 8
Chronic disease	Chronic disease	4	4	2, 4, 7, 8
Chronic disease	Dementia	4	4	2, 3, 7, 8
Crime	Chronic disease	4	4	2, 4, 7, 8
Crime	Dementia	4	4	2, 5, 7, 8
Crime	Family caregiving	4	4	2, 5, 7, 8
Crime	Stress	4	4	5, 6, 7, 8
Dementia	Dementia	4	4	2, 3, 5, 8
Education level	Physical health	4	4	1, 2, 3, 7
Exercise	Access to quality healthcare	4	4	1, 4, 7, 8
Exercise	Access to quality jobs	4	4	1, 2, 5, 8
Exercise	Chronic disease	4	4	2, 4, 7, 8
Exercise	Dementia	4	4	2, 5, 7, 8
Exercise	Family caregiving	4	4	2, 5, 7, 8
Family caregiving	Chronic disease	4	4	2, 4, 7, 8
Family caregiving	Dementia	4	4	2, 5, 7, 8
Family caregiving	Family caregiving	4	4	2, 5, 7, 8
Family caregiving	Stress	4	4	5, 6, 7, 8
Health literacy	Health literacy	4	4	1, 2, 6, 7
Income	Access to quality healthcare	4	4	1, 3, 4, 8
Income	Chronic disease	4	4	2, 3, 4, 8
Income	Income	4	4	1, 2, 4, 8
Income	Physical health	4	4	1, 2, 3, 8
Private transportation	Physical health	4	4	1, 2, 3, 7
Public transportation	Physical health	4	4	2, 3, 7, 8
Social isolation	Dementia	4	4	2, 3, 5, 8
Stress	Dementia	4	4	3, 5, 7, 8
Stress	Stress	4	4	5, 6, 7, 8
Systemic racism	Access to quality education	4	4	2, 5, 6, 8
Systemic racism	Mental health disorders	4	4	1, 5, 6, 8
Systemic racism	Trust on medical expertise	4	4	2, 5, 6, 8

3.4 Loops

Finally, one way to focus down models to the systems they express is to only consider nodes that are involved in loops. To do this we construct a list of nodes in models that are reachable from themselves.

This lets us visualize simplified models from each group, but also means there is less similarity among groups.

loops_only <- expanded_data %>% filter(cause==effect)

grp1_loops<- grp1 %>%
  filter(cause %in% loops_only[loops_only$model==1,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==1,'effect'])

grp2_loops<- grp2 %>%
  filter(cause %in% loops_only[loops_only$model==2,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==2,'effect'])

grp3_loops<- grp3 %>%
  filter(cause %in% loops_only[loops_only$model==3,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==3,'effect'])

grp4_loops<- grp4 %>%
  filter(cause %in% loops_only[loops_only$model==4,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==4,'effect'])

grp5_loops<- grp5 %>%
  filter(cause %in% loops_only[loops_only$model==5,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==5,'effect'])

grp6_loops<- grp6 %>%
  filter(cause %in% loops_only[loops_only$model==6,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==6,'effect'])

grp7_loops<- grp7 %>%
  filter(cause %in% loops_only[loops_only$model==7,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==7,'effect'])

grp8_loops<- grp8 %>%
  filter(cause %in% loops_only[loops_only$model==8,'cause']) %>%
  filter(effect %in% loops_only[loops_only$model==8,'effect'])

#to compare
ARCHES_loops <- rbind(grp1_loops,grp2_loops,grp3_loops,grp4_loops,grp5_loops,grp6_loops,grp7_loops,grp8_loops)

#Group 1
graph_plot(grp1_loops)

#Group 2
graph_plot(grp2_loops)

#Group 3
graph_plot(grp3_loops)

#Group 4
graph_plot(grp4_loops)

#Group 5
graph_plot(grp5_loops)

#Group 6
graph_plot(grp6_loops)

#Group 7
graph_plot(grp7_loops)

#Group 8
graph_plot(grp8_loops)

The most widely shared nodes involved in these loops is Crime (5 groups). 7 more are shared by 4 groups. In contrast, edges are shared by fewer groups (only 14 edges by more than 1 group)./

loops_only <- expanded_data %>% filter(cause==effect)


ARCHES_loops %>% mutate(node=cause)->loops_causes
ARCHES_loops %>% mutate(node=effect)->loops_effects

rbind(loops_causes,loops_effects)->loops_nodes

loops_nodes %>%
  group_by(node) %>%
  summarize(n=n(),n_groups=length(unique(model)),groups=paste(model,collapse=", ")) -> by_loops_nodes

by_loops_nodes %>% filter(n_groups>3) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()

node	n	n_groups	groups
Crime	25	5	2, 4, 4, 4, 5, 5, 6, 6, 8, 8, 8, 8, 8, 2, 4, 5, 5, 5, 5, 6, 6, 6, 8, 8, 8
Access to quality jobs	19	4	1, 1, 1, 2, 2, 5, 5, 8, 8, 1, 1, 1, 2, 2, 5, 5, 8, 8, 8
Chronic disease	16	4	2, 4, 4, 7, 7, 8, 2, 4, 4, 7, 8, 8, 8, 8, 8, 8
Dementia	22	4	2, 2, 2, 3, 5, 5, 8, 8, 8, 8, 8, 2, 2, 2, 3, 5, 5, 5, 5, 5, 8, 8
Family caregiving	18	4	2, 5, 5, 7, 7, 8, 8, 8, 8, 8, 8, 2, 5, 7, 7, 7, 8, 8
Health literacy	20	4	1, 1, 1, 1, 2, 2, 2, 6, 6, 7, 1, 1, 1, 1, 1, 2, 2, 2, 6, 7
Income	27	4	1, 1, 1, 1, 1, 1, 1, 2, 2, 4, 4, 8, 8, 8, 8, 8, 8, 8, 1, 2, 2, 2, 4, 8, 8, 8, 8
Stress	32	4	5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8

ARCHES_loops %>%
  group_by(cause,polarity,effect) %>%
  summarize(n=n(),n_groups=length(unique(model)),groups=paste(model,collapse=", ")) -> by_loops

by_loops %>% filter(n_groups>1) %>% arrange(desc(n_groups)) %>% kbl(digits=2) %>% kable_styling()

cause	polarity	effect	n	n_groups	groups
Access to quality diet	1	Physical health	2	2	7, 8
Access to quality jobs	1	Income	2	2	1, 2
Crime	-1	Safe neighborhood	2	2	6, 8
Crime	1	Food desert	2	2	4, 8
Dementia	1	Family caregiving	2	2	2, 8
Dementia	1	Mental health disorders	2	2	5, 8
Education level	1	Access to quality jobs	2	2	1, 2
Family caregiving	1	Stress	2	2	5, 8
Income	-1	Poverty	2	2	1, 4
Income	1	Private transportation	2	2	1, 2
Mental health disorders	1	Crime	2	2	5, 8
Mental health disorders	1	Dementia	2	2	5, 8
Misdiagnosis	1	Dementia	2	2	2, 5
Stress	1	Mental health disorders	2	2	5, 8

4) Multivariate comparisons

We can make matrices of each of these data types (nodes, direct edges, indirect edges, direct edges involved in loops) and use them to quantitatively display similarity and differences among groups with ordinates, as well as run environmental fit analyses to see if the distance of groups in “model space” (defined, again, but nodes, edges, etc.) fits more losely to average age, gender, and deprivation values for each group, then expected based on comparing against 999 randomizations.

#this is making matrices

m_nodes<-
  ARCHES_grp_nodes %>%
  mutate(model=Group,chain=node) %>%
  group_by(model,chain) %>% summarize(count=1) %>%
  pivot_wider(id_cols=model,names_from=chain,values_from=count,values_fill=0)  %>% as.data.frame()
rownames(m_nodes)<-m_nodes[,1]
m_nodes<-m_nodes[,-1]

m_direct<-
  ARCHES_grp %>%
  mutate(presence=1,chain=paste(cause, polarity,effect)) %>%
  pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0)  %>% as.data.frame()
rownames(m_direct)<-m_direct[,1]
m_direct<-m_direct[,-1]

m_indirect<-
  expanded_data %>%
  mutate(presence=1,chain=paste(cause, effect)) %>%
  pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0)  %>% as.data.frame()
rownames(m_indirect)<-m_indirect[,1]
m_indirect<-m_indirect[,-1]

m_loops<-
  ARCHES_loops %>%
  mutate(presence=1,chain=paste(cause,polarity,effect)) %>%
  pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0)  %>% as.data.frame()
rownames(m_loops)<-m_loops[,1]
m_loops<-m_loops[,-1]

m_loops<-
  ARCHES_loops %>%
  mutate(presence=1,chain=paste(cause,polarity,effect)) %>%
  pivot_wider(id_cols=model,names_from=chain,values_from=presence,values_fill=0)  %>% as.data.frame()
rownames(m_loops)<-m_loops[,1]
m_loops<-m_loops[,-1]

##Okay let's do ordinations
require(vegan)

#characteristics
characteristics<-ARCHES_grp %>% group_by(model) %>% summarize(mean_age=unique(mean_age),mean_gender=unique(mean_gender),mean_deprivation=unique(mean_deprivation)) %>% as.data.frame()
rownames(characteristics)<-characteristics[,1]
characteristics[,1]<-as.factor(characteristics[,1])

#####Node
mds_nodes<-metaMDS(m_nodes,distance="euclidian",trace=0) #this makes the NMDS object
#ordiplot(mds_nodes, type='t',display="sites")
envfit(mds_nodes,characteristics) #nothing

## 
## ***VECTORS
## 
##                     NMDS1    NMDS2     r2 Pr(>r)
## mean_age          0.54511  0.83836 0.4410  0.253
## mean_gender       0.13953 -0.99022 0.4930  0.166
## mean_deprivation  0.07993  0.99680 0.3843  0.254
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##          NMDS1   NMDS2
## model1 -0.0424 -0.0585
## model2 -5.4393 -0.9329
## model3 -3.7574  4.4054
## model4  4.1500 -2.8573
## model5  1.4326  1.7399
## model6  5.6443  2.6941
## model7 -0.8480 -0.1802
## model8 -1.1396 -4.8105
## 
## Goodness of fit:
##       r2 Pr(>r)
## model  1      1
## Permutation: free
## Number of permutations: 999

tabasco(m_nodes[,colSums(m_nodes)>3],use=hclust(vegdist(m_nodes[,colSums(m_nodes)>0])))

#(6,5) (2,8) (3,7)

#####Direct
mds_direct<-metaMDS(m_direct,distance="bray",trace=0) #this makes the NMDS object
#ordiplot(mds_direct)
#ordiplot(mds_direct, type='t',display="sites")
envfit(mds_direct,characteristics) #nothing

## 
## ***VECTORS
## 
##                     NMDS1    NMDS2     r2 Pr(>r)
## mean_age          0.86260  0.50588 0.0546  0.851
## mean_gender       0.91687  0.39919 0.1818  0.608
## mean_deprivation  0.68451 -0.72900 0.1739  0.637
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##          NMDS1   NMDS2
## model1 -0.3329  0.1571
## model2 -0.1614 -0.1574
## model3 -0.4241 -0.1305
## model4  0.0333  0.3284
## model5  0.2072 -0.3705
## model6  0.5506  0.0932
## model7 -0.0931  0.1005
## model8  0.2205 -0.0207
## 
## Goodness of fit:
##       r2 Pr(>r)
## model  1      1
## Permutation: free
## Number of permutations: 999

tabasco(m_direct[,colSums(m_direct)>1],use=hclust(vegdist(m_direct[,colSums(m_direct)>0])))

#(7,8) (1,2)

#indirect
mds_indirect<-metaMDS(m_indirect,distance="bray",trace=0) #this makes the NMDS object
#ordiplot(mds_indirect) 
envfit(mds_indirect,characteristics) #deprivation, nonsignificantly

## 
## ***VECTORS
## 
##                     NMDS1    NMDS2     r2 Pr(>r)  
## mean_age          0.67728 -0.73573 0.1839  0.572  
## mean_gender       0.49027  0.87157 0.0223  0.945  
## mean_deprivation -0.18724 -0.98231 0.6774  0.054 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##          NMDS1   NMDS2
## model1 -0.1083  0.3149
## model2 -0.1454 -0.1797
## model3  0.5384 -0.2803
## model4  0.2691  0.3741
## model5 -0.3973  0.0508
## model6 -0.3938 -0.2280
## model7  0.2686 -0.0433
## model8 -0.0313 -0.0085
## 
## Goodness of fit:
##       r2 Pr(>r)
## model  1      1
## Permutation: free
## Number of permutations: 999

#(6,5) (2,8) (3,7) (equivalent to nodes)

tabasco(m_indirect[,colSums(m_indirect)>3],use=hclust(vegdist(m_indirect[,colSums(m_indirect)>0])))

#loop
mds_loops<-metaMDS(m_loops,distance="euclidian",trace=0) #this makes the NMDS object
#ordiplot(mds_loops) 
#ordiplot(mds_loops, type='t',display="sites")
envfit(mds_loops,characteristics) #age,non-significantly

## 
## ***VECTORS
## 
##                     NMDS1    NMDS2     r2 Pr(>r)
## mean_age          0.36583 -0.93068 0.0192  0.953
## mean_gender       0.43192 -0.90191 0.4948  0.168
## mean_deprivation  0.14992  0.98870 0.0325  0.932
## Permutation: free
## Number of permutations: 999
## 
## ***FACTORS:
## 
## Centroids:
##          NMDS1   NMDS2
## model1 -2.7663  3.7183
## model2 -5.4224 -2.2948
## model3 -1.2771 -0.2000
## model4 -1.0257 -0.7172
## model5  1.2235 -2.8502
## model6  0.8903  2.4196
## model7  0.9588  0.1941
## model8  7.4189 -0.2698
## 
## Goodness of fit:
##       r2 Pr(>r)
## model  1      1
## Permutation: free
## Number of permutations: 999

#tabasco(m_loops[,colSums(m_loops)>1],use=hclust(vegdist(m_loops[,colSums(m_loops)>1])))

ARCHES

Robbie Hart

2023-12-19

ARCHES

0) Goals:

1) Group structure

2) Models

3) Commonalities and differences among groups

3.1 Nodes

3.2 Direct edges

3.3 Indirect edges

3.4 Loops

4) Multivariate comparisons