load("Prima_elaborazione_2.RData")
library(igraph)
library(HistDAWass)
library(brainGraph)
library(tidyverse)
library(plotly)
library(effects)
library(heatmaply)
source("Preprocessing.R")
source("useful_functions.R")
knitr::opts_chunk$set(cache=TRUE)

PCA of 4500 nets

Plane 1-2

plo12<-ggplotly(plot_indiv(res_ALL,x=1,y=2)+geom_point(aes(color=as.factor(DESC$types))))
plo12

Plane 3-4

plo34<-ggplotly(plot_indiv(res_ALL,x=3,y=4)+geom_point(aes(color=as.factor(DESC$types))))
plo34              

a 3d plot

DF3d<-as.data.frame(res_ALL$ind$coord)
DF3d<-DF3d %>% mutate(ID=row.names(.),types=as.factor(DESC$types))
plo3d<- plot_ly(DF3d, x = ~Dim.1, y = ~Dim.2, z = ~Dim.3,color = ~types,
                text=~ID,
                type = 'scatter3d', mode = 'markers', 
                marker=list(
                  size=4,
                  line=list(
                    color='MediumPurple',
                    width=0.5
                  )
                ))
plo3d

Cluster analysis

tree=hclust(dist(res_ALL$ind$coord),method = "ward.D")
clu<-cutree(tree,45)

Compute the Gini index for each class vs clusters

DF<-data.frame(types=DESC$types,size=DESC$size,dens=DESC$dens)

DF<-DF %>% mutate(exper=paste(types,size,dens,sep="_"))
print(paste0("Adjusted Rand index -> ",mclust::adjustedRandIndex(clu,DF$exper)))
## [1] "Adjusted Rand index -> 0.56818660794762"
confu<-as.data.frame(table(DF$exper,clu))
confu %>% pivot_wider(names_from = clu,values_from = Freq)
confu<-confu %>% pivot_wider(names_from = clu,values_from = Freq)
pur_cla<-numeric();for(i in 2:46){pur_cla<-c(pur_cla,sum((confu[,i]/sum(confu[,i]))^2))}
pur_cla_row<-numeric();for(i in 1:45){
  pur_cla_row<-c(pur_cla_row,sum((confu[i,2:46]/sum(confu[i,2:46]))^2))
}

Effects

tmp<-data.frame(CLU=confu$Var1,pp=pur_cla_row)
tmp<-tmp %>% right_join(unique(DF), by=c("CLU"="exper"))
tmp$types<-factor(tmp$types, levels=c("RG","2_I","3_I","PA","SW"))
tmp$size<-factor(tmp$size, levels=c(960,480,240))
tmp$dens<-factor(tmp$dens, levels=c(0.1,0.05,0.01))

model<-lm(pp~types+size+dens,tmp)
print(summary(model))
## 
## Call:
## lm(formula = pp ~ types + size + dens, data = tmp)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.268511 -0.102116  0.008684  0.079533  0.310311 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.837600   0.060728  13.793 6.13e-16 ***
## types2_I    -0.179711   0.064013  -2.807 0.008013 ** 
## types3_I    -0.226244   0.064013  -3.534 0.001144 ** 
## typesPA     -0.249933   0.064013  -3.904 0.000398 ***
## typesSW      0.007978   0.064013   0.125 0.901512    
## size480     -0.006040   0.049584  -0.122 0.903724    
## size240      0.082867   0.049584   1.671 0.103348    
## dens0.05     0.081840   0.049584   1.651 0.107534    
## dens0.01    -0.259533   0.049584  -5.234 7.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1358 on 36 degrees of freedom
## Multiple R-squared:  0.7049, Adjusted R-squared:  0.6393 
## F-statistic: 10.75 on 8 and 36 DF,  p-value: 1.436e-07

Plots of effects

plot(allEffects(model))

Heatmap of clusters

confum<-as.matrix(confu[,2:46])

row.names(confum)<-confu$Var1
plo_hm<-heatmaply(confum,Colv=NA)
plo_hm