The user must set his working directory according to the instructions of the appropriate section
The files must be in Excel or csv format, i.e. with " .xlsx " " .xls " or* “.csv” file extensions
In order to extract the raw code (.R) from the r markdown file (.rmd) , one must :
install.packages("knitr")
library(knitr)
purl('Network analysis and Monte Carlo simulation.Rmd')
In order to set the working directory containing the data, the user must add the following line to the extracted code
setwd("C:/path/to/your/working/directory")
Installing the “pacman” package only if it is not installed yet
if (!require("pacman")) install.packages("pacman")
Installing the remaining packages only if they aren’t installed yet, with pacman’s “p_load” function
pacman::p_load(xfun,readxl,dplyr,tidyr,expm,truncnorm,foreach,parallel,igraph,rgl,rmarkdown,matrixcalc,installr,knitr,base64enc)
Loading all needed packages
packages<-c("xfun","Rlabkey","readxl","dplyr","tidyr","expm","truncnorm","Matrix","MASS","foreach","igraph","rgl","highr","rmarkdown","matrixcalc","installr","knitr","base64enc")
lapply(packages, require, character.only = TRUE)
This section contains 2 user defined functions dedicated to the data import process
Any argument with a default value is optional
These functions have side effects 1
To import 1 file only, the user must assign the “file_import()” function’s argument named “file_path” the relative path2 of the file
To import multiple files, the user only have to assign the “multiple_file_import” function’s argument named “datafolder_path” the relative path3 of the folder containing the files to import
Creates a graph “g” and its adjacency matrix “G” from raw graph data in Excel or csv format (.xlsx, .xls or .csv file extensions).
file_import<- function(filepath,matrix_output_name="G",graph_output_name="g",convert=FALSE,directed=FALSE,weighted_adjacency=FALSE){
file_path<-filepath
#creating an object named 'dat', with the assignment operator, simply containing data of the excel file
if(file_ext(file_path) == "csv"){
read.csv(file=file_path, header=TRUE, sep=",")
}
if(file_ext(file_path) == "xlsx" | file_ext(file_path) == "xls" ){
dat <- read_excel(file_path) }
if(convert == TRUE){
dat<-gather(dat,key=id1,value=id2,na.rm=TRUE) }
#convert the raw data into an graph
g<-graph_from_data_frame(d = dat,directed = directed, vertices =NULL)
assign((graph_output_name),g,envir=.GlobalEnv)
G<-get.adjacency(g, type = "both")
G<-as.matrix(G)
if(weighted_adjacency == TRUE & directed == FALSE){
G<-round(G/apply(G,1,sum),3) }
G[is.nan(G)] = 0
assign((matrix_output_name),G,envir=.GlobalEnv)
return(G)
}
Uses the raw graph data located in your “data” folder to create two list objects : “IGRAPHS”, a list of graphs and “Matrixes” , a list of adjacency matrixes.
multiple_file_import<- function(datafolder_path,convert=FALSE,directed=FALSE,weighted_adjacency=FALSE,diag=TRUE){
Data_path<-datafolder_path
flist <-list.files(path= Data_path, full.names=TRUE, recursive=FALSE)
if(convert==FALSE){
flist<-lapply(flist, function(x) {
if(file_ext(x) == "csv"){
read.csv(file=x, header=TRUE, sep=",")
}
if(file_ext(x) == "xlsx" | file_ext(x) == "xls" ){
dat <- read_excel(x) }
}) }
if(convert == TRUE){
flist<-lapply(flist, function(x) {
dat<-gather(dat,key=id1,value=id2,na.rm=TRUE)
})
}
IGRAPHS<-lapply(flist,function(x){g<-graph_from_data_frame(d = x,directed = directed, vertices =NULL)
})
assign(("IGRAPHS"),IGRAPHS,envir=.GlobalEnv)
Matrixes<-lapply(flist,function(x){
g<-graph_from_data_frame(d = x,directed = directed, vertices =NULL)
g<-simplify(g)
G<-get.adjacency(g)
G<-as.matrix(G)
})
if(weighted_adjacency == TRUE & directed == FALSE){
Matrixes<-lapply(flist,function(x){
g<-graph_from_data_frame(d = x,directed = directed, vertices =NULL)
g<-simplify(g)
G<-get.adjacency(g)
G<-as.matrix(G)
G<-round(G/apply(G,1,sum),3)
})
}
if(diag==TRUE){
G_diag<-bdiag(Matrixes)
G_diag<-as.matrix(G_diag)
assign(("G_diag"),G_diag,envir=.GlobalEnv)}
assign(("Matrixes"),Matrixes,envir=.GlobalEnv)
}
Creating a set of custom functions
Any argument with a default value is optional
Allows to set a matrix’s NaNs to zero, but is only used in an older data import process
Made obsolete by igraph “graph_from_data_frame()" and "get.adjacency()” functions
fix_nan <- function(x){
x[is.nan(x)] <- 0
x
}
Allows to generate an adjacency matrix given it’s dimension (number of vertices) and the probability d for drawing an edge between two arbitrary vertices, but is only used in an older data import process
Made obsolete by igraph "erdos.renyi.game()" function
G_gen<-function(dimension,d){
a<-matrix(0, dimension, dimension)
for(i in 1:nrow(a)){
for(j in 1:ncol(a)){
a[i, j] <- sample(c(0,1),size=1,replace=TRUE,prob= c(1-d,d))
}
}
diag(a)<-0
a<-round(a/apply(a,1,sum),3)
fix_nan(a)
}
Compute the density of a graph from the associated adjacency matrix
Made obsolete by igraph "edge_density() " function
Density<-function(G) {
E=0
for(i in 1:nrow(G)) {
for(j in 1:ncol(G)) {
if (G[i,j] != 0){
E=E+1
}
}
}
# E<-2*E for undirected graphs
#the number of cells in G, minus the diagonal of 'impossible relationships' between the individual and himself.
V<- (nrow(G) * (nrow(G)-1))
density<- E / (V)
return(density)
}
The function computes graph related summary statistics and put the result inside of a dataframe
graph_summary_stats<-function(graph_set=IGRAPHS,single_graph=NULL){
if(is.null(single_graph) == FALSE){
diameter<-diameter(g)
density<- edge_density(g)
transitivity<-transitivity(graph = g)
vertices<-length(V(g))
edges<-gsize(g)
Summary<-t(data.frame( Value=c(vertices,edges,transitivity,density,diameter)))
colnames(Summary)<-c('Number of vertices', 'Number of edges ','Transitivity','Density','Diameter')
return(Summary)
}
if(is.null(single_graph) == TRUE){
Diameter<-lapply(graph_set,function(x){diameter(graph = x)})
Diameter<-as.numeric(Diameter)
avg_diameter<-mean(Diameter)
Density<-lapply(graph_set,function(x){edge_density(graph = x)})
Density<-as.numeric(Density)
avg_density<-mean(Density)
Transitivity<-lapply(graph_set,function(x){transitivity(graph = x)})
#Average transitivity
Transitivity<-as.numeric(Transitivity)
avg_transitivity<-mean(Transitivity)
v<-lapply(graph_set,function(x){length(V(x))})
v<-as.numeric(v)
avg_vertices<-mean(v)
e<-lapply(graph_set,function(x){gsize(x)})
e<-as.numeric(e)
avg_edges<-mean(e)
a<-data.frame( avg_vertices,avg_edges,avg_transitivity,avg_density,avg_diameter)
colnames(a)<-c('Number of vertices', 'Number of edges ','Transitivity','Density','Diameter')
rownames(a)<-"Average"
Summary_avg<-data.frame( Ve=v,ed=e,tr=Transitivity,de=Density,di=Diameter)
colnames(Summary_avg)<-c('Number of vertices', 'Number of edges ','Transitivity','Density','Diameter')
rownames(Summary_avg)<-c(files_name)
Summary_avg<-rbind(a,Summary_avg)
return(Summary_avg)
}
}
This function set vertices color to their relative 5 degree and vertices size to their betweenness centrality
igraph_custom_plotting<-function(graph,mode=c("out","in","total"),plotType=c("plot","tkplot","rglplot")){
#Calculating the maximum degree in the graph
max<- max(degree(graph,mode=mode))
#Looping over each vertex in order to set it's color accordingly to it's relative degree.
for(i in 1:vcount(graph)) {
if(degree(graph,i,mode=mode)/max == 0){
V(graph)[i]$color<-"white"}
else if (degree(graph,i,mode=mode)/max > 0 & degree(graph,i,mode=mode)/max <0.2){
V(graph)[i]$color<-"blue"}
else if (degree(graph,i,mode=mode)/max >= 0.2 & degree(graph,i,mode=mode)/max <0.4){
V(graph)[i]$color<-"yellow"}
else if (degree(graph,i,mode=mode)/max >= 0.4 & degree(graph,i,mode=mode)/max <0.6){
V(graph)[i]$color<-"orange"}
else if (degree(graph,i,mode=mode)/max >= 0.6 & degree(graph,i,mode=mode)/max <0.8){
V(graph)[i]$color<-"orangered1"}
else if (degree(graph,i,mode=mode)/max >= 0.8){
V(graph)[i]$color<-"red3"}
}
#Defining the betweeness centrality of each vertex.
cen<-betweenness(graph)
#Plot output conditionnal on the "plotType" argument.
#Regular plot
if(plotType == "plot"){
plot(graph,vertex.size=(cen/43+8),layout=layout_nicely(graph),vertex.label.cex=0.5)}
#Interactive plot (most advised)
if(plotType == "tkplot"){
tkplot(graph,vertex.size=(cen/43+8),layout=layout_nicely(graph),vertex.label.cex=0.5)}
#experimental 3D plot
if(plotType == "rglplot"){
rglplot(graph,vertex.size=(cen/43+8),layout=layout_nicely(graph),vertex.label.cex=0.5)}
}
This function has side effects
This function runs a Monte Carlo simulation based on the one described in “Identification of peer effects through social networks” 6 by Bramoullé, Djebbari and Fortin.
Function structure
This function is actually a wrapper of 4 different Monte Carlo user defined functions.Hence, it is mainly made of 4 blocks.
In the right order: multiple == FALSE & fixed_effects == TRUE; multiple & fixed_effects == FALSE; multiple & fixed_effects == TRUE; multiple == TRUE & fixed_effects == FALSE
The function’s argument allow to set which block is triggered by the function.
Code commentary
In the code commentary, both “appropriate size” and “m” refer to the number of vertices of the graph. This can be calculated either by ‘nrow(G)’ or ‘ncol(G)’, the number of rows or columns of the adjacency matrix G.
Since the function is a wrapper, the code is very redundant and most lines are only commented on their first occurence
x generation
The x vector of characteristics is generated as in the paper , i.e.:
1. use a Bernouilli distribution to set some x to zero with propability p=0.0542 (probability of 0.9458 to be different than zero)
2. for the positives ones, draw a value in a normal distribution, with an expected value of 1 and a standard deviation of 3.
However, the paper uses a log-normal distribution, which can lead to inconsistent results7 in R
Data requirement
Obviously, if multiple = TRUE the function requires the object “Matrixes”(obtained by running the multiple_file_import() function), while if multiple = FALSE only one adjacency matrix is needed (the adjacency matrix of a single graph or the bloc diagonal matrix G_diag both obtainable via the file_import() function ). Check section 2 on the data import process for more details
Omitted adjacency matrixes when multiple=TRUE
When running the simulation on multiple files, an object “omitted_matrixes” is created, indicating how many non-invertible matrixes have been removed from the simulation
Main equations
Here is the Latex equivalent for the main equations and objects used inside the function
\[\textrm{Inv = } \; (I - \beta G)^{-1} \qquad \textrm{P =} \quad S(S'S)^{-1}S\] \[ \textrm{IGy = } \; (I - \beta G)^{-1} \ (\gamma I \ + \ \delta G) (I-G)X \ + \ (I - \beta G)^{-1} (I-G) \ \epsilon \qquad \textrm{with fixed effects } \qquad\; \textrm{y = } \; \alpha (I - \beta G)^{-1} l \ + \ (I - \beta G)^{-1} \ (\gamma I \ + \ \delta G) X \ + \ (I - \beta G)^{-1} \epsilon \ \ {\scriptsize (l \ being \ a \ column \ vector \ of \ 1)} \;\; \qquad \textrm{ otherwise } \]
\[\textrm{S = } \; [ \ (I-G)X \quad\; (I-G)GX \quad\; (I-G)G^2X \ ] \qquad \textrm{with fixed effects } \qquad\; \textrm{S = } \; [ \ l \quad\; X \quad\; GX \quad\; G^2X \ ] \;\; \qquad \textrm{ otherwise } \] \[\textrm{X_t = } \; \widetilde X=[ \ (I-G)Gy \quad\; (I-G)X \quad\;(I-G)GX \ ] \qquad \textrm{with fixed effects } \qquad\; \textrm{X_t = } \; [ \ l \quad\;Gy \quad\; X \quad\;GX \ ] \;\; \qquad \textrm{ otherwise } \] \[\textrm{Z_t = } \; \widehat Z = [ \ (I-G)Gy( \theta) \quad\; (I-G)X \quad\;(I-G)GX \ ] \qquad \textrm{with fixed effects } \qquad\; \textrm{Z_t = } \; [ \ l \quad\;Gy( \theta) \quad\; X \quad\;GX \ ] \;\; \qquad \textrm{ otherwise } \] \[\textrm{th_lee_1 = } \; ( \widehat Z' \widetilde X)^{-1} \widehat Z'y \qquad \textrm{var_th_lee =} \quad (\widehat Z' \widetilde X)^{-1} \widehat Z' D \widehat Z (\widetilde X'\widehat Z )^{-1} \]
Monte_carlo<-function(G=NULL,n,e_var=0.1,multiple=FALSE,fixed_effects=TRUE,alpha=0.7683,beta=0.4666,gamma=0.0834,delta=0.1507){
#Initializing the variance list
variances_list<-list()
if(multiple == FALSE){
#Drawing a vector x of characteristics
x_sim<-matrix(rbinom(n = nrow(G),size = 1,prob = 0.9458 ),nrow(G),1)
for(i in 1:nrow(x_sim)){
if(x_sim[i,] != 0){
x_sim[i,]<-rtruncnorm(n = 1,a = 0,b = 1000,mean = 1,sd = 3)
}
}
#a vector filled with 1, size m x 1 (used when there is an intercept in the model, i.e. when fixed_effects = FALSE)
l<-matrix(1,nrow(G),1)
GX<-G %*% x_sim
G2X<-(G %^% 2) %*% x_sim
#an identity matrix of appropriate size
I<-(diag(nrow(G)))
# Inv corresponds to (I - Beta*G))^(-1) in the reduced form(check equation (5))
# Solve function gives the inverse of a matrix
Inv<-solve(I - beta * G)
if(is.singular.matrix(Inv)==TRUE){
print( "Unfortunaltely, the object Inv (=I - beta * G) is singular, i.e. it can not be inverted. Hence the monte carlo simulation is impossible to run. ")
}
if(is.singular.matrix(Inv)==FALSE){
if(fixed_effects == TRUE){
thetas<-matrix(,3,0)
IG <-(I - G)
#the instrument vector of size m x 3
S<-matrix(c(IG%*%x_sim,IG%*%GX,IG%*%G2X),nrow(G),)
#P is the weighting matrix of size m x m
P<-S %*% solve(t(S) %*% S) %*% t(S)
#Loop with 1 iterations to randomly generate n epsilons
for(i in 1:n){
#generating an epsilon with E=0 ;var=0.1
eps<-matrix(rnorm(n = nrow(G),mean = 0,sd = e_var),nrow(G),1)
IGy<-Inv %*% (gamma*I + delta * G) %*% IG %*% x_sim + Inv %*% IG %*% eps
#X tilde, size m x 3
X_t<-matrix(c(G%*%IGy ,IG%*%x_sim,IG%*%GX),nrow(x_sim),)
#theta 2sls and extracting its parameters
th_2sls <-solve(t(X_t) %*% P %*% X_t) %*% t(X_t) %*% P %*% IGy
beta_2sls<-th_2sls[1]
gamma_2sls<-th_2sls[2]
delta_2sls<-th_2sls[3]
#Recalculate I with Beta_2sls
Inv_2sls<-solve(I - beta_2sls * G )
#IGy estimated in theta 2sls
IGy_2sls<-Inv_2sls %*% (gamma_2sls * I + delta_2sls * G) %*% IG %*% x_sim + Inv_2sls %*% IG %*% eps
IG_Gy_2sls<- G %*% Inv_2sls %*% (IG %*% (x_sim * gamma_2sls + GX* delta_2sls))
Z_th<-matrix(c(IG_Gy_2sls,IG%*%x_sim,IG%*%GX),nrow(G),)
#THETAS
#theta lee
th_lee_1<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% IGy
#extracting theta lee 's parameters
beta_lee<-th_lee_1[1]
gamma_lee<-th_lee_1[2]
delta_lee<-th_lee_1[3]
#Recalculate Inv with Beta_lee
Inv_lee<-solve(I - beta_lee * G )
#cbind allows to add a column to an already existing matrix
#(here, thetas_sim, initially of size 3 x 0)
#each loop iteration adds up one column of estimated parameters to the matrix
#when the loop ends, thetas_sim is of size 4 x n, i.e. it contains n theta vectors,
#each of them estimated with a different epsilon.
thetas<-cbind(thetas,th_lee_1)
#VARIANCES
#Calculating the square residuals
IGy_lee<-Inv_lee %*% (gamma_lee*I + delta_lee * G) %*% IG %*% x_sim + Inv_lee %*% IG %*% eps
squared_residuals <- (IGy-IGy_lee)^2
#Creating a diagonal matrix of appropriate size filled with squared residuals
D<-Diagonal(nrow(G),squared_residuals)
D<-as(D,"matrix")
#Calculating the variance
var_th_lee<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% D %*% Z_th %*% solve(t(X_t) %*% Z_th)
#Adding each calculated vraince into the variances list
variances_list[[i]]<-var_th_lee
}
}
if(fixed_effects == FALSE){
thetas<-matrix(,4,0)
#the instrument vector of size m x 4
S<-matrix(c(l,x_sim,GX,G2X),nrow(x_sim),)
#P is the weighting matrix of size m x n
P<-S %*% solve(t(S) %*% S) %*% t(S)
for(i in 1:n){
eps<-matrix(rnorm(n = nrow(G),mean = 0,sd = e_var),nrow(G),1)
y<-alpha * Inv %*% l + Inv %*% (gamma * I + delta * G) %*% x_sim + Inv %*% eps
Gy<-G %*% y
#X tilde, size n x 4
X_t<-matrix(c(l,Gy,x_sim,GX),nrow(x_sim),)
#theta 2sls and extracting its parameters
th_2sls <-solve(t(X_t) %*% P %*% X_t) %*% t(X_t) %*% P %*% y
alpha_2sls<-th_2sls[1]
beta_2sls<-th_2sls[2]
gamma_2sls<-th_2sls[3]
delta_2sls<-th_2sls[4]
#Recalculate I with Beta_2sls
I_2sls<-solve((diag(nrow(G)) - beta_2sls * G ))
#Gy estimated in theta 2sls
gy_2sls<- G %*% I_2sls %*% (alpha_2sls * l + gamma_2sls * x_sim + GX * delta_2sls)
Z_th<-matrix(c(rep(1,nrow(G)),gy_2sls,x_sim,GX),nrow(x_sim),)
#THETAS
#theta lee
th_lee_1<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% y
#extracting theta lee 's parameters
alpha_lee<-th_lee_1[1]
beta_lee<-th_lee_1[2]
gamma_lee<-th_lee_1[3]
delta_lee<-th_lee_1[4]
#Recalculate I with Beta_lee
I_lee<-solve((diag(nrow(G)) - beta_lee * G ))
#cbind allows to add a column to an already existing matrix
#(here, thetas_sim, initially of size 4 x 0)
#each loop iteration adds up one column of estimated parameters to the matrix
#when the loop ends, thetas_sim is of size 4 x 1000, i.e. it contains 1000 theta vectors,
#each of them estimated with a different epsilon.
thetas<-cbind(thetas,th_lee_1)
#VARIANCES
#Calculating the square residuals
y_lee<-alpha_lee * I_lee %*% l + I_lee %*% (gamma_lee * I_lee + delta_lee * G) %*% x_sim
#Creating a diagonal matrix of appropriate size filled with squared residuals
squared_residuals <- (y-y_lee)^2
D<-Diagonal(nrow(G),squared_residuals)
D<-as(D,"matrix")
#Calculating the variance
var_th_lee<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% D %*% Z_th %*% solve(t(X_t) %*% Z_th)
#Adding each calculated vraince into the variances list
variances_list[[i]]<-var_th_lee
}
}
}
}
if(multiple ==TRUE){
z=0
for(j in 1:length(Matrixes)){
G<-Matrixes[[j]]
I<-(diag(nrow(G)))
Inv<-solve(I - beta * G )
if(is.singular.matrix(Inv)==TRUE){
z=z+1
}
if(is.singular.matrix(Inv)==FALSE){
x_sim<-matrix(rbinom(n = nrow(G),size = 1,prob = 0.9458 ),nrow(G),1)
for(i in 1:nrow(x_sim)){
if(x_sim[i,] != 0){
x_sim[i,]<-rtruncnorm(n = 1,a = 0,b = 1000,mean = 1,sd = 3)
}
}
l<-matrix(1,nrow(G),1)
GX<-G %*% x_sim
G2X<-(G %^% 2) %*% x_sim
if(fixed_effects == TRUE){
thetas<-matrix(,3,0)
IG <-(I - G)
S<-matrix(c(IG%*%x_sim,IG%*%GX,IG%*%G2X),nrow(G),)
P<-S %*% solve(t(S) %*% S) %*% t(S)
for(i in 1:n){
eps<-matrix(rnorm(n = nrow(G),mean = 0,sd = e_var),nrow(G),1)
IGy<-Inv %*% (gamma*I + delta * G) %*% IG %*% x_sim + Inv %*% IG %*% eps
X_t<-matrix(c(G%*%IGy ,IG%*%x_sim,IG%*%GX),nrow(x_sim),)
th_2sls <-solve(t(X_t) %*% P %*% X_t) %*% t(X_t) %*% P %*% IGy
beta_2sls<-th_2sls[1]
gamma_2sls<-th_2sls[2]
delta_2sls<-th_2sls[3]
Inv_2sls<-solve(I - beta_2sls * G )
IGy_2sls<-Inv_2sls %*% (gamma_2sls * I + delta_2sls * G) %*% IG %*% x_sim + Inv_2sls %*% IG %*% eps
IG_Gy_2sls<- G %*% Inv_2sls %*% (IG %*% (x_sim * gamma_2sls + GX* delta_2sls))
Z_th<-matrix(c(IG_Gy_2sls,IG%*%x_sim,IG%*%GX),nrow(G),)
#THETAS
#theta lee
th_lee_1<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% IGy
#extracting theta lee 's parameters
beta_lee<-th_lee_1[1]
gamma_lee<-th_lee_1[2]
delta_lee<-th_lee_1[3]
#Recalculate Inv with Beta_lee
Inv_lee<-solve(I - beta_lee * G )
thetas<-cbind(thetas,th_lee_1)
#Calculating the variance
#Calculating the square residuals
IGy_lee<-Inv_lee %*% (gamma_lee*I + delta_lee * G) %*% IG %*% x_sim + Inv_lee %*% IG %*% eps
squared_residuals <- (IGy-IGy_lee)^2
#Creating a diagonal matrix of appropriate size filled with squared residuals
D<-Diagonal(nrow(G),squared_residuals)
D<-as(D,"matrix")
#Calculating the variance
var_th_lee<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% D %*% Z_th %*% solve(t(X_t) %*% Z_th)
#Adding each calculated variance into the variances list
variances_list[[i]]<-var_th_lee
}
}
if(fixed_effects == FALSE){
thetas<-matrix(,4,0)
S<-matrix(c(l,x_sim,GX,G2X),nrow(x_sim),)
P<-S %*% solve(t(S) %*% S) %*% t(S)
for(i in 1:n){
eps<-matrix(rnorm(n = nrow(G),mean = 0,sd = e_var),nrow(G),1)
y<-alpha * Inv %*% l + Inv %*% (gamma * I + delta * G) %*% x_sim + Inv %*% eps
Gy<-G %*% y
X_t<-matrix(c(l,Gy,x_sim,GX),nrow(x_sim),)
th_2sls <-solve(t(X_t) %*% P %*% X_t) %*% t(X_t) %*% P %*% y
alpha_2sls<-th_2sls[1]
beta_2sls<-th_2sls[2]
gamma_2sls<-th_2sls[3]
delta_2sls<-th_2sls[4]
I_2sls<-solve((diag(nrow(G)) - beta_2sls * G ))
gy_2sls<- G %*% I_2sls %*% (alpha_2sls * l + gamma_2sls * x_sim + GX * delta_2sls)
Z_th<-matrix(c(rep(1,nrow(G)),gy_2sls,x_sim,GX),nrow(x_sim),)
#THETAS
#theta lee
th_lee_1<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% y
#extracting theta lee 's parameters
alpha_lee<-th_lee_1[1]
beta_lee<-th_lee_1[2]
gamma_lee<-th_lee_1[3]
delta_lee<-th_lee_1[4]
I_lee<-solve((diag(nrow(G)) - beta_lee * G ))
thetas<-cbind(thetas,th_lee_1)
y_lee<-alpha_lee * I_lee %*% l + I_lee %*% (gamma_lee * I_lee + delta_lee * G) %*% x_sim
squared_residuals <- (y-y_lee)^2
D<-Diagonal(nrow(G),squared_residuals)
D<-as(D,"matrix")
var_th_lee<-solve(t(Z_th) %*% X_t) %*% t(Z_th) %*% D %*% Z_th %*% solve(t(X_t) %*% Z_th)
variances_list[[i]]<-var_th_lee
}
}
}
}
assign(("omitted_matrixes"),z,envir=.GlobalEnv)
}
# Results ####
#Variance
#Computing the average variance-covariance matrix
Y <- do.call(cbind,variances_list)
Y <- array(Y, dim=c(dim(variances_list[[1]]), length(variances_list)))
#Creates an average variance-covariance matrix named "variance"
variance<-apply(Y, c(1, 2), mean, na.rm = TRUE)
if(fixed_effects == TRUE){
rownames(variance)<-c("beta","gamma","delta")
colnames(variance)<-c("beta","gamma","delta")
}
if(fixed_effects == FALSE){
rownames(variance)<-c("alpha","beta","gamma","delta")
colnames(variance)<-c("alpha","beta","gamma","delta")
}
assign(("variance"),variance,envir=.GlobalEnv)
#Parameters estimation
# rowMeans(thetas) calculates the mean value on the n observations (when multiple = FALSE) or n x number of matrixes (when multiple = TRUE) of each parameter in Thetas to get the average thetas over the thetas generated
estimates<-rowMeans(thetas)
estimates<-as.data.frame(estimates)
if(fixed_effects == TRUE){
rownames(estimates)<-c("beta","gamma","delta")
}
if(fixed_effects == FALSE){
rownames(estimates)<-c("alpha","beta","gamma","delta")
}
return(estimates)
}
#Generating a graph with 100 vertices and a probability of link of 0.04 with the "random.renyi.game()" function
g<-erdos.renyi.game(100,0.04)
#Generating the associated weighted adjacency matrix
G<-get.adjacency(g)
G<-as.matrix(G)
graph_summary_stats(single_graph=g)
Number of vertices Number of edges Transitivity Density Diameter
Value 100 189 0.03829787 0.03818182 6
igraph_custom_plotting(g,mode = "out",plotType = "plot")
Monte_carlo(G = G,n = 100,multiple = FALSE, fixed_effects = TRUE)
estimates
beta 0.46453832
gamma 0.08123747
delta 0.14972314
Visualizing the associated variance-covariance matrix
variance
beta gamma delta
beta 73.40190 66.741200 34.850890
gamma 66.74120 216.797261 3.245542
delta 34.85089 3.245542 25.974420
side effects : https://en.wikipedia.org/wiki/Side_effect_(computer_science)
In our functions, the side effects consist in creating global variable(s) when running the function↩︎
relative path refers to the file location relative to the working directory↩︎
relative path refers to the file location relative to the working directory↩︎
https://stackoverflow.com/questions/23956467/what-is-the-difference-between-a-directed-and-undirected-graph↩︎
relative: each vertex degree is normalized to the highest degree in the graph. For instance, if the vertex with the highest degree in the graph has a degree of 10, then each vertex degree will be divided by 10.
Hence, the color indicates the relative degree of each vertex.↩︎
“Identification of peer effects through social networks” by Bramoullé, Djebbari and Fortin. https://www.sciencedirect.com/science/article/pii/S0304407609000335?via%3Dihub↩︎
https://msalganik.wordpress.com/2017/01/21/making-sense-of-the-rlnorm-function-in-r/↩︎
Note that in this case only 3 parameters are estimated since the intercept, \(\alpha\) , is then captured by the network fixed effect.↩︎