Useful commands or R Markdown Cheat Sheet
Ignacio et al. (2022) Banyard, Hamby, and Grych (2017) Cai et al. (2024) Cano et al. (2020) Cromer et al. (2019) Hong et al. (2018) Ferber and Weller (2022) Callaghan et al. (2019) Chan et al. (2023) Weidacker et al. (2022) Hirshfeld-Becker et al. (2019) Gur et al. (2020) Bram, Gottschalk, and Leeds (2018) Wymbs et al. (2020) Bernstein and McNally (2018) Gildawie, Honeycutt, and Brenhouse (2020) Kuhlman et al. (2023) Kayser et al. (2019) Dvorsky et al. (2019) Kirby et al. (2022) Murphy et al. (2017) Carter, Powers, and Bradley (2020) Ramaiya et al. (2018) Sendzik et al. (2017) Wu, Slesnick, and Murnan (2018) Cornwell et al. (2024) Skinner et al. (2020) Gibbons and Bouldin (2019) Rodman et al. (2019) Higheagle Strong et al. (2020) McRae et al. (2017) Martinez Jr et al. (2022) Vannucci et al. (2019) Noroña-Zhou and Tung (2021) Motsan, Yirmiya, and Feldman (2022) Rich et al. (2019) Krause et al. (2018) Sui et al. (2020) Grych et al. (2020) Vega-Torres et al. (2020) Kliewer and Parham (2019) Griffith, Farrell-Rosen, and Hankin (2023) Siciliano et al. (2023) White et al. (2021) Bettis et al. (2019) Rudolph et al. (2024) Linke et al. (2020) Cornwell et al. (2023) Criss et al. (2017) Koban et al. (2017) Kashdan et al. (2020) Priel et al. (2020) Hannan et al. (2017) Malberg (2023) Tang, Tang, and Gross (2019) Caceres et al. (2024) Gupta, Dickey, and Kujawa (2022) Gee (2022) Khahra et al. (2024) Szoko et al. (2023) Barzilay et al. (2020) Jiang, Paley, and Shi (2022) Xiao et al. (2019) Finkelstein-Fox, Park, and Riley (2018) Sevinc et al. (2019) Blair et al. (2018) Buthmann et al. (2024) Schäfer et al. (2017) Smith and Jen’nan (2024) explored research focusing on various populations and areas to find the effects of several factors like living experience, environment, physical and mental health, and their relationship of emotion regulation and resilience development.
This is where the steps go
library(splitstackshape)
library(stringr)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(networkD3)
library(magrittr)
library(htmlwidgets)
##
## Attaching package: 'htmlwidgets'
## The following object is masked from 'package:networkD3':
##
## JS
library(htmltools)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:igraph':
##
## groups
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(igraph)
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
a<-read.csv("/Users/jinzeyang/Desktop/Fall 2024/Social Networks Analysis/A_Delieverables/US emotion regulation.csv")
authors<-as.data.frame(a[,1])#"Author.s..ID"])
colnames(authors)<-"AU"
authors$AU <- str_remove_all(authors$AU, "[^[\\a-zA-Z ]]")
a1<-cSplit(authors, splitCols = "AU", sep = ";", direction = "wide", drop = TRUE)
class(a1)
## [1] "data.table" "data.frame"
mat <- as.matrix(a1)
dim(mat)
## [1] 70 16
mat <- cbind(a$EID, mat)
edgelist1<-matrix(NA, 1, 2)
for (i in 1:1) {
edgelist11 <- cbind(mat[, i], c(mat[, -c(1:i)]))
edgelist1 <- rbind(edgelist1,edgelist11)
edgelist1<-edgelist1[!is.na(edgelist1[,2]),]
edgelist1<-edgelist1[edgelist1[,2]!="",]
}
dim(edgelist1)
## [1] 407 2
head(edgelist1)
## [,1] [,2]
## [1,] "2-s2.0-85124618928" "Ignacio D.A."
## [2,] "2-s2.0-85010380267" "Banyard V."
## [3,] "2-s2.0-85195804072" "Cai Y."
## [4,] "2-s2.0-85078064575" "Cano M.."
## [5,] "2-s2.0-85066870128" "Cromer K.D."
## [6,] "2-s2.0-85054193257" "Hong F."
g<- graph.data.frame(edgelist1[, 2:1], directed = FALSE)
V(g)$type <- V(g)$name %in% edgelist1[ , 2]
table(V(g)$type)
##
## FALSE TRUE
## 70 376
i<-table(V(g)$type)[2]
V(g)$label<-V(g)$name
mat_g2_incidence <- t(get.incidence(g))
dim(mat_g2_incidence)
## [1] 376 70
dta <- data.frame(id=rownames(mat_g2_incidence), count=rowSums(mat_g2_incidence))
head(dta)
## id count
## Ignacio D.A. Ignacio D.A. 1
## Banyard V. Banyard V. 2
## Cai Y. Cai Y. 1
## Cano M.. Cano M.. 1
## Cromer K.D. Cromer K.D. 1
## Hong F. Hong F. 1
dta <- dta[order(dta$count, decreasing=T), ]
head(dta)
## id count
## Compas B.E. Compas B.E. 3
## Banyard V. Banyard V. 2
## Gur R.E. Gur R.E. 2
## Cornwell H. Cornwell H. 2
## Grych J. Grych J. 2
## Bettis A.H. Bettis A.H. 2
mat_g2_incidence_to_1 <- mat_g2_incidence%*%t(mat_g2_incidence)
diag(mat_g2_incidence_to_1)<-0
dim(mat_g2_incidence_to_1)
## [1] 376 376
table(colSums(mat_g2_incidence_to_1)==0)
##
## FALSE TRUE
## 375 1
table(rowSums(mat_g2_incidence_to_1)==0)
##
## FALSE TRUE
## 375 1
mat_g2_incidence_to_1 <- mat_g2_incidence_to_1 /rowSums(mat_g2_incidence_to_1)
summary(rowSums(mat_g2_incidence_to_1))
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 1 1 1 1 1 1
# remain isolates, make it zero and get list weights
mat_g2_incidence_to_1[is.na(mat_g2_incidence_to_1)]<-0
listw<-mat2listw(mat_g2_incidence_to_1)
#Creating a dataset for tests, matching the information
pd <- data.frame(id=colnames(mat_g2_incidence_to_1))
head(pd)
## id
## 1 Ignacio D.A.
## 2 Banyard V.
## 3 Cai Y.
## 4 Cano M..
## 5 Cromer K.D.
## 6 Hong F.
head(colnames(mat_g2_incidence_to_1))
## [1] "Ignacio D.A." "Banyard V." "Cai Y." "Cano M.." "Cromer K.D."
## [6] "Hong F."
pd$pub<- dta$count[match(pd$id, dta$id)]
head(pd)
## id pub
## 1 Ignacio D.A. 1
## 2 Banyard V. 2
## 3 Cai Y. 1
## 4 Cano M.. 1
## 5 Cromer K.D. 1
## 6 Hong F. 1
pd$lag.pub <- round(lag.listw(listw, pd$pub, zero.policy=T, na.action=na.omit),3)
head(pd)
## id pub lag.pub
## 1 Ignacio D.A. 1 1.0
## 2 Banyard V. 2 1.8
## 3 Cai Y. 1 1.0
## 4 Cano M.. 1 1.0
## 5 Cromer K.D. 1 1.0
## 6 Hong F. 1 1.2
# Question 2:
moran.test(pd$pub, listw, zero.policy=TRUE)
##
## Moran I test under randomisation
##
## data: pd$pub
## weights: listw
## n reduced by no-neighbour observations
##
## Moran I statistic standard deviate = 11.928, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.397340340 -0.002673797 0.001124628
# Question 3:
mi <- moran.test(pd$pub, listw, zero.policy=TRUE) #PHUDCFILY
mp_math <- moran.plot(pd$pub, listw, labels=as.character(pd$id), zero.policy = TRUE)
mp_math$new_label <- paste("Author name: ", mp_math$labels, "<br>own pub record: ", pd$pub[match(mp_math$labels, pd$id)],
"<br>Peers' record: ", pd$lag.pub[match(mp_math$labels, pd$id)], sep="") #PHUDCFILY
mphu <-ggplot(mp_math, aes(x=jitter(x), y=jitter(wx), text=new_label)) + geom_point(shape=1, alpha=0) +
geom_hline(yintercept=mean(mp_math$wx), lty=2) +
geom_vline(xintercept=mean(mp_math$x), lty=2) + theme_minimal() +
geom_point(data=mp_math[(mp_math$wx>=mean(mp_math$wx)&mp_math$x>=mean(mp_math$x))&mp_math$is_inf==FALSE,], aes(x=x, y=wx), shape=1, alpha=.1) +
geom_point(data=mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx>=mean(mp_math$wx))&mp_math$is_inf==TRUE,], aes(x=x, y=wx), shape=9, alpha=.4) +
geom_point(data=mp_math[(mp_math$x<mean(mp_math$x)&mp_math$wx>=mean(mp_math$wx))&mp_math$is_inf==FALSE,], aes(x=x, y=wx), shape=1, alpha=.1) +
geom_point(data=mp_math[(mp_math$x<mean(mp_math$x)&mp_math$wx>=mean(mp_math$wx))&mp_math$is_inf==TRUE,], aes(x=x, y=wx), shape=9, alpha=.4) +
geom_point(data=mp_math[(mp_math$x<mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==FALSE,], aes(x=x, y=wx), shape=1, alpha=.1) +
geom_point(data=mp_math[(mp_math$x<mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,], aes(x=x, y=wx), shape=9, alpha=.4) +
geom_point(data=mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==FALSE&(mp_math$x-mp_math$wx<min(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$x-mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$wx)),], aes(x=x, y=wx), shape=1, alpha=.3, position=position_jitter(h=0.1,w=0.1)) +
geom_point(data=mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==FALSE&(mp_math$x-mp_math$wx>=min(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$x-mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$wx)),], aes(x=x, y=wx), shape=9, alpha=.8, colour=rgb(223, 255, 0, max=255, 255/1), position=position_jitter(h=0.1,w=0.1)) +
geom_point(data=mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,], aes(x=x, y=wx), shape=9, alpha=.8, colour=rgb(255, 0, 126, max=255, 255/1), position=position_jitter(h=0.1,w=0.1)) +
xlab("Authors' Own Publication count (y_i)") + ylab("Coauthors' Publication count (y_j)") + ggtitle("Peer effects",
subtitle = "") + labs(caption = "(PHUDCFILY)") +
theme(plot.title = element_text(color=rgb(255, 0, 126, max=255, 255/1), size=14, face="bold"),
plot.subtitle = element_text(color = "blue"))#
mphu
ggplotly(mphu, tooltip = c("new_label")) %>%
layout(title = list(text = paste0('<b>Peer effects on Publication Performance</b>',
'<br>',
'<sup style="color:black; font-weight:bold">',
paste(dim(mat_g2_incidence_to_1)[1],' authors in ', dim(mat_g2_incidence)[2],' Publications. Conservative: ', dim(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,])[1], ' HL cases (pink), Liberal: ', dim(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==FALSE&(mp_math$x-mp_math$wx>=round(min(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$x-mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$wx),3)),])[1], ' HL cases (green) </sup>', sep=""),
'<span style="font-size:10px; font-weight:bold; color: rgb(255, 0, 126, max=255, 255/1)">',
paste("\nSource: Scopus. Moran's I = ", round(mi$estimate[1], 3)," P. Value ", ifelse(mi$p.value==0, "< 0.001", mi$p.value), ', Liberal threshold = ', round(min(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$x-mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$wx),3), '</span>', sep=""))))
# get illustrations for pink points and green points
#Conservative
mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]
## x wx is_inf labels dfb.1_ dfb.x dffit cov.r cook.d
## 58 2 1 TRUE Gee D.G. 0.3482164 -0.3920042 -0.4104217 1.00748 0.08324622
## 87 2 1 TRUE Kofler M.J. 0.3482164 -0.3920042 -0.4104217 1.00748 0.08324622
## 164 2 1 TRUE Feldman R. 0.3482164 -0.3920042 -0.4104217 1.00748 0.08324622
## 181 2 1 TRUE Gross J.J. 0.3482164 -0.3920042 -0.4104217 1.00748 0.08324622
## 198 2 1 TRUE Liu S. 0.3482164 -0.3920042 -0.4104217 1.00748 0.08324622
## hat
## 58 0.03031362
## 87 0.03031362
## 164 0.03031362
## 181 0.03031362
## 198 0.03031362
## new_label
## 58 Author name: Gee D.G.<br>own pub record: 2<br>Peers' record: 1
## 87 Author name: Kofler M.J.<br>own pub record: 2<br>Peers' record: 1
## 164 Author name: Feldman R.<br>own pub record: 2<br>Peers' record: 1
## 181 Author name: Gross J.J.<br>own pub record: 2<br>Peers' record: 1
## 198 Author name: Liu S.<br>own pub record: 2<br>Peers' record: 1
# Liberal
mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==FALSE&(mp_math$x-mp_math$wx>=round(min(mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$x-mp_math[(mp_math$x>=mean(mp_math$x)&mp_math$wx<mean(mp_math$wx))&mp_math$is_inf==TRUE,]$wx),3)),]
## [1] x wx is_inf labels dfb.1_ dfb.x dffit
## [8] cov.r cook.d hat new_label
## <0 rows> (or 0-length row.names)
# Question4:
# I tried order =3 and above, but the R shows error that
# "sp.correlogram: too few included observations in higher lags: reduce order", so I chose order =2
sp.correlogram(listw[[2]], pd$pub, order = 2, method = "I", zero.policy=T) # calculate social correlogram for autocorrelations
## Spatial correlogram for pd$pub
## method: Moran's I
## estimate expectation variance standard deviate Pr(I) two sided
## 1 (375) 0.3036855 -0.0026738 0.0011236 9.1394 <2e-16 ***
## 2 (81) 0.0089194 -0.0125000 0.0041642 0.3319 0.7399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot.spcor(sp.correlogram(listw[[2]], pd$pub, order = 2, method = "I", zero.policy=T), xlab = "Social lags", main = "Social correlogram: Autocorrelation with CIs")