Designed for COMM621 Quantitative Research Methods Graduate Seminar.
Before testing this script, check if the following libraries are installed and pre-loaded.
library(bibliometrix) #the library for bibliometrics
library(quanteda) #a library for quantitative text analysis
library(dplyr) #for data munging
require(ggplot2) #visualization
require(lubridate)
require(topicmodels) #for topic modeling
Use saved search results from Web of Science database (http://apps.webofknowledge.com/) for the journal Nonprofit and Voluntary Sector Quarterly. There are over 900 results returned and they are saved into two BibTex files. The library bibliometrix can convert bibtex files into data frames. (COMM621 students: download the Bibtext files from Moodle).
A1 <- readFiles("NVSQ_part1.bib")
A2 <- readFiles("NVSQ_part2.bib")
M1 <- convert2df(A1, dbsource = "isi", format = "bibtex")
## Articles extracted 100
## Articles extracted 200
## Articles extracted 300
## Articles extracted 400
## Articles extracted 500
M2 <- convert2df(A2, dbsource = "isi", format = "bibtex")
## Articles extracted 100
## Articles extracted 200
## Articles extracted 300
## Articles extracted 400
## Articles extracted 421
M<- rbind(M1,M2) #merge the two files
Generate a descriptive analysis of the references.
results <- biblioAnalysis(M, sep = ";")
S<- summary(object=results,k=20,pause=FALSE)
##
##
## Main Information about data
##
## Articles 921
## Sources (Journals, Books, etc.) 1
## Keywords Plus (ID) 1250
## Author's Keywords (DE) 2010
## Period 1994 - 2018
## Average citations per article 16.6
##
## Authors 1404
## Author Appearances 1828
## Authors of single authored articles 237
## Authors of multi authored articles 1167
##
## Articles per Author 0.656
## Authors per Article 1.52
## Co-Authors per Articles 1.98
## Collaboration Index 2.1
##
##
## Annual Scientific Production
##
## Year Articles
## 1994 10
## 1995 21
## 1996 24
## 1997 22
## 1998 20
## 1999 22
## 2000 26
## 2001 37
## 2002 25
## 2003 29
## 2004 47
## 2005 21
## 2006 32
## 2007 36
## 2008 41
## 2009 47
## 2010 52
## 2011 51
## 2012 49
## 2013 51
## 2014 58
## 2015 56
## 2016 74
## 2017 59
## 2018 11
##
## Annual Percentage Growth Rate 0.3979153
##
##
## Most Productive Authors
##
## Authors Articles Authors Articles Fractionalized
## 1 HANDY F 11 HANDY F 5.17
## 2 ABZUG R 10 ABZUG R 4.83
## 3 GUO C 9 GAZLEY B 4.67
## 4 SCHERVISH PG 8 SCHERVISH PG 4.67
## 5 BRUDNEY JL 7 GUO C 4.25
## 6 CNAAN RA 7 NESBIT R 4.17
## 7 GAZLEY B 7 SCHNEIDER JA 4.00
## 8 GRONBJERG KA 6 SMITH DH 4.00
## 9 NESBIT R 6 HAGER MA 3.75
## 10 ROONEY PM 6 GRONBJERG KA 3.33
## 11 BIELEFELD W 5 BRUDNEY JL 3.08
## 12 HAGER MA 5 BROOKS AC 3.00
## 13 LIU G 5 KIM M 3.00
## 14 LUKSETICH W 5 OSTROWER F 3.00
## 15 QUARTER J 5 TANIGUCHI H 3.00
## 16 ROTOLO T 5 BIELEFELD W 2.83
## 17 SARGEANT A 5 BOWMAN W 2.83
## 18 SAXTON GD 5 BROWN E 2.83
## 19 SUNDEEN RA 5 LUKSETICH W 2.83
## 20 BEKKERS R 4 ROTOLO T 2.83
##
##
## Top manuscripts per citations
##
## Paper
## 1 FROELICH KA,(1999),NONPROFIT VOLUNT. SECT. Q.
## 2 CNAAN RA;HANDY F;WADSWORTH M,(1996),NONPROFIT VOLUNT. SECT. Q.
## 3 YOUNG DR,(2000),NONPROFIT VOLUNT. SECT. Q.
## 4 CLARY EG;SNYDER M;STUKAS AA,(1996),NONPROFIT VOLUNT. SECT. Q.
## 5 EBRAHIM A,(2005),NONPROFIT VOLUNT. SECT. Q.
## 6 GUO C;ACAR M,(2005),NONPROFIT VOLUNT. SECT. Q.
## 7 WOLLEBAEK D;SELLE P,(2002),NONPROFIT VOLUNT. SECT. Q.
## 8 BORZAGA C;TORTIA E,(2006),NONPROFIT VOLUNT. SECT. Q.
## 9 DART R,(2004),NONPROFIT VOLUNT. SECT. Q.
## 10 HERMAN RD;RENZ DO,(1999),NONPROFIT VOLUNT. SECT. Q.
## 11 FORBES DP,(1998),NONPROFIT VOLUNT. SECT. Q.
## 12 GAZLEY B;BRUDNEY JL,(2007),NONPROFIT VOLUNT. SECT. Q.
## 13 SOWA JE;SELDEN SC;SANDFORT JR,(2004),NONPROFIT VOLUNT. SECT. Q.
## 14 BROWN E;FERRIS JM,(2007),NONPROFIT VOLUNT. SECT. Q.
## 15 HAGER MA;WILSON S;POLLAK TH;ROONEY PM,(2003),NONPROFIT VOLUNT. SECT. Q.
## 16 MILLER-MILLESEN JL,(2003),NONPROFIT VOLUNT. SECT. Q.
## 17 ALEXANDER J;NANK R;STIVERS C,(1999),NONPROFIT VOLUNT. SECT. Q.
## 18 MINKOFF DC,(2002),NONPROFIT VOLUNT. SECT. Q.
## 19 HERMAN RD;RENZ DO,(1997),NONPROFIT VOLUNT. SECT. Q.
## 20 GRONBJERG KA;PAARLBERG L,(2001),NONPROFIT VOLUNT. SECT. Q.
## TC TCperYear
## 1 234 12.32
## 2 194 8.82
## 3 188 10.44
## 4 178 8.09
## 5 159 12.23
## 6 144 11.08
## 7 144 9.00
## 8 139 11.58
## 9 139 9.93
## 10 130 6.84
## 11 130 6.50
## 12 125 11.36
## 13 118 8.43
## 14 115 10.45
## 15 111 7.40
## 16 107 7.13
## 17 102 5.37
## 18 100 6.25
## 19 100 4.76
## 20 99 5.82
##
##
## Most Productive Countries (of corresponding authors)
##
## Country Articles Freq SCP MCP
## 1 USA 584 0.65251 545 39
## 2 CANADA 45 0.05028 33 12
## 3 ENGLAND 44 0.04916 38 6
## 4 BELGIUM 22 0.02458 20 2
## 5 GERMANY 21 0.02346 18 3
## 6 NETHERLANDS 20 0.02235 13 7
## 7 GEORGIA 19 0.02123 18 1
## 8 ISRAEL 19 0.02123 16 3
## 9 SPAIN 17 0.01899 16 1
## 10 AUSTRALIA 12 0.01341 8 4
## 11 NORWAY 12 0.01341 10 2
## 12 SWITZERLAND 11 0.01229 9 2
## 13 ITALY 10 0.01117 10 0
## 14 SCOTLAND 8 0.00894 6 2
## 15 FRANCE 6 0.00670 5 1
## 16 KOREA 5 0.00559 2 3
## 17 AUSTRIA 3 0.00335 3 0
## 18 IRELAND 3 0.00335 2 1
## 19 SINGAPORE 3 0.00335 1 2
## 20 SWEDEN 3 0.00335 3 0
##
##
## SCP: Single Country Publications
##
## MCP: Multiple Country Publications
##
##
## Total Citations per Country
##
## Country Total Citations Average Article Citations
## 1 USA 10645 18.23
## 2 CANADA 939 20.87
## 3 ENGLAND 517 11.75
## 4 BELGIUM 376 17.09
## 5 GEORGIA 345 18.16
## 6 NORWAY 295 24.58
## 7 NETHERLANDS 280 14.00
## 8 ISRAEL 278 14.63
## 9 ITALY 254 25.40
## 10 GERMANY 182 8.67
## 11 SPAIN 168 9.88
## 12 AUSTRALIA 128 10.67
## 13 SWITZERLAND 82 7.45
## 14 FINLAND 62 31.00
## 15 SCOTLAND 50 6.25
## 16 CHINA 36 18.00
## 17 MEXICO 35 35.00
## 18 FRANCE 32 5.33
## 19 ROMANIA 24 24.00
## 20 DENMARK 18 9.00
##
##
## Most Relevant Sources
##
## Sources Articles
## 1 NONPROFIT AND VOLUNTARY SECTOR QUARTERLY 921
##
##
## Most Relevant Keywords
##
## Author Keywords (DE) Articles Keywords-Plus (ID) Articles
## 1 VOLUNTEERING 67 ORGANIZATIONS 125
## 2 NONPROFIT 54 PARTICIPATION 66
## 3 PHILANTHROPY 53 SECTOR 63
## 4 NONPROFIT ORGANIZATIONS 49 PERFORMANCE 53
## 5 ACCOUNTABILITY 30 WORK 49
## 6 CHARITABLE GIVING 30 BEHAVIOR 47
## 7 SOCIAL CAPITAL 30 IMPACT 40
## 8 CIVIL SOCIETY 27 GOVERNANCE 36
## 9 CIVIC ENGAGEMENT 22 NONPROFIT ORGANIZATIONS 30
## 10 FUNDRAISING 22 DONATIONS 29
## 11 GOVERNANCE 22 MODEL 29
## 12 NONPROFITS 22 ALTRUISM 26
## 13 ORGANIZATIONS 17 MANAGEMENT 26
## 14 ADVOCACY 16 SERVICE 26
## 15 MOTIVATION 16 STATE 26
## 16 SOCIAL 16 ACCOUNTABILITY 25
## 17 FOUNDATIONS 15 GOVERNMENT 24
## 18 GIVING 15 PERSPECTIVE 24
## 19 VOLUNTEERISM 15 UNITED-STATES 24
## 20 VOLUNTARY ASSOCIATIONS 14 DETERMINANTS 23
plot(x = results, k = 10, pause = FALSE)
Create a co-citation network of authors.
NetMatrix <- biblioNetwork(M, analysis = "coupling", network = "authors", sep = ";")
net=networkPlot(NetMatrix, normalize = "salton", weighted=T, n = 20, labelsize=0.5,curved=TRUE,Title = "A Co-citation Network of Authors", type = "kamada", size=TRUE,remove.multiple=TRUE)
Create a collaboration network of universities.
NetMatrix1 <- biblioNetwork(M, analysis = "collaboration", network = "universities", sep = ";")
net=networkPlot(NetMatrix1, normalize = "salton", weighted=T, n = 20, labelsize=0.5,curved=TRUE,Title = "A Collaboration Network of Universities", type = "circle", size=TRUE,remove.multiple=TRUE)
Create a collaboration network of countries.
M <- metaTagExtraction(M, Field = "AU_CO", sep = ";")
NetMatrix2 <- biblioNetwork(M, analysis = "collaboration", network = "countries", sep = ";")
net=networkPlot(NetMatrix2, normalize = "salton", weighted=T, n = 30, labelsize=0.5,curved=TRUE,Title = "A Collaboration Network of Countries", type = "sphere", size=TRUE,remove.multiple=TRUE)
Here is the keyword co-occurence network.
NetMatrix3 <- biblioNetwork(M, analysis = "co-occurrences", network = "keywords", sep = ";")
net=networkPlot(NetMatrix3, normalize="association", weighted=T, n = 30,curved=TRUE,Title = "A Keyword Co-occurrence Network, based on the top 30", type = "kamada", size=T,edgesize = 5,labelsize=0.5)
Now,apply Natural Language Processing and Topic Modeling to abstracts of NVSQ publications. First, clean the text.
txt1 <- M[,c("TI","AB")] #take two columns from the dataframe M. Column TI is for journal article titles, and column AB is for abstracts.
txt1$TI <- tolower(txt1$TI) #convert to lower case
txt1$AB <- tolower(txt1$AB)
Create a corpus in which each abstract is a document.
txt1_corpus <- corpus(txt1,docid_field = "TI",text_field = "AB")
txt1_corpus #txt1_corpus is the name of the corpus created.
## Corpus consisting of 921 documents and 0 docvars.
Tokenization. There are many ways to tokenize text (by sentence, by word, or by line). For our data, we tokenize by words.
toks <- tokens(txt1_corpus,remove_punct = TRUE) #notice that we remove punctuation along the way
head(toks[[2]], 5) #show the first 5 tokens in the second document.
## [1] "nonprofit" "organizations" "that" "primarily"
## [5] "provide"
Remove stop words and a customized list of filter words.
nostop_toks <- tokens_select(toks, stopwords('en'), selection = 'remove')
nostop_toks <- tokens_select(nostop_toks, c("abstract", "study","the"), selection = 'remove')
Create n-gram.
ngram <- tokens_ngrams(nostop_toks, n=2) #for bigram
head(ngram[[1]], 5) #show the first 5 bigram
## [1] "social_media" "media_era" "era_ushers"
## [4] "ushers_increasingly" "increasingly_noisy"
Create DTM (Document Term Matrix).
nostop_toks_dfm <- dfm(nostop_toks)
nostop_toks_dfm
## Document-feature matrix of: 921 documents, 8,686 features (99.3% sparse).
Create a semantic co-occurrence network.
new_dfm <- dfm_trim(nostop_toks_dfm, min_count = 50) #create a new dfm to include words that have appeared at least 50 times in the corpus.
new_fcm <- fcm(new_dfm) #create a feature co-occurrence matrix
feat <- names(topfeatures(new_fcm, 100))
new_fcm <- fcm_select(new_fcm, feat) #extract top 100 keywords and create a feature co-occurrence matrix based on the top 100
size <- log(colSums(dfm_select(new_dfm, feat)))
textplot_network(new_fcm, min_freq = 0.8, vertex_size = size / max(size) * 3)
Get top 10 keywords (top features) in the DTM.
topfeatures(nostop_toks_dfm, 10)
## nonprofit organizations social article research
## 1000 903 570 562 439
## nonprofits organizational volunteering service data
## 366 348 341 331 327
Plot top keywords and produce a wordcloud of top keywords.
freq <- textstat_frequency(new_dfm, n = 25)
new_dfm %>%
textstat_frequency(n = 15) %>%
ggplot(aes(x = reorder(feature, frequency), y = frequency)) +
geom_point() +
coord_flip() +
labs(x = NULL, y = "Frequency") +
theme_minimal()
textplot_wordcloud(new_dfm, max_words = 100)
Get tf–idf (Term Frequency-Inverse Document Frequency).And view top keywords by tf-idf.
tfidf_dfm <- dfm_tfidf(nostop_toks_dfm)
topfeatures(tfidf_dfm, 10)
## nonprofit social volunteering organizations nonprofits
## 341.0103 311.1963 300.5827 274.8221 269.5125
## service volunteer volunteers community organizational
## 247.1828 242.4899 230.0244 226.5400 219.8738
Try topic modeling.
dtm <- convert(nostop_toks_dfm, to = "topicmodels")
lda <- LDA(dtm, k = 6) # set the number of topics to 6.
terms(lda, 10) #show top 10 words pertaining to each topic
## Topic 1 Topic 2 Topic 3 Topic 4
## [1,] "nonprofit" "nonprofit" "organizations" "social"
## [2,] "organizations" "organizations" "nonprofit" "organizations"
## [3,] "article" "nonprofits" "social" "volunteer"
## [4,] "research" "social" "data" "organizational"
## [5,] "public" "volunteering" "service" "giving"
## [6,] "data" "sector" "voluntary" "nonprofit"
## [7,] "also" "community" "use" "also"
## [8,] "organizational" "research" "nonprofits" "factors"
## [9,] "nonprofits" "literature" "research" "public"
## [10,] "sector" "organization" "three" "volunteers"
## Topic 5 Topic 6
## [1,] "article" "service"
## [2,] "nonprofit" "social"
## [3,] "organizations" "community"
## [4,] "volunteering" "organizational"
## [5,] "social" "organizations"
## [6,] "voluntary" "authors"
## [7,] "data" "research"
## [8,] "using" "volunteering"
## [9,] "found" "funding"
## [10,] "information" "capital"
docvars(nostop_toks_dfm, 'topic') <- topics(lda) #btain the most likely topics for each document
head(topics(lda), 5) #show topic allocation for the first docucments
## speaking and being heard: how nonprofit advocacy organizations gain attention on social media
## 5
## service-providing nonprofits working in coalition to advocate for policy change
## 4
## representation and diversity, advocacy, and nonprofit arts organizations
## 4
## what regulation, who pays? public attitudes to charity regulation in england and wales
## 6
## do donors reduce bilateral aid to countries with restrictive ngo laws? a panel study, 1993-2012
## 3