LiteratureReview_stats.R

peter — May 10, 2014, 4:34 PM

#PRELIMINARIES
setwd("~/Documents/PhD/Analysis/IWA") #Set working directory
library(RQDA) #Load RQDA
Loading required package: RSQLite
Loading required package: DBI
Loading required package: gWidgetsRGtk2
Loading required package: RGtk2
Loading required package: gWidgets
RQDA() #Open RQDA
openProject("LiteratureReview.rqda", updateGUI=TRUE) #Open project

#DESCRIPTIVE STATISTICS
#Extract Data
library(stringr)
cases <- RQDAQuery("SELECT fid, caseid FROM caselinkage WHERE status==1")
caselist <- RQDAQuery("SELECT id, name FROM cases WHERE status==1")
cases <- merge(cases, caselist, by.x="caseid", by.y="id")
paste("Cases per abstract:", nrow(cases)/length(getFiles())) #Ensure only one case per file
[1] "Cases per abstract: 1"

keywords <- RQDAQuery("SELECT catid, name FROM filecat WHERE status==1")
keys <- RQDAQuery("SELECT fid, catid FROM treefile WHERE status==1")
keys <- merge(keywords,keys, by="catid")
paste("Keywords per abstract", length(keys$fid)/length(unique(keys$fid))) #Keywords per abstract
[1] "Keywords per abstract 1.12745098039216"

abstracts <- RQDAQuery("SELECT id AS fid, name FROM source WHERE status==1")
abstracts <- merge(abstracts, cases, by="fid")
abstracts <- merge(abstracts, keys, by="fid")
abstracts$year <- as.numeric(str_sub(abstracts$name.x,-4)) #Year of publication
abstracts <- abstracts[,c(-1,-3,-5)]
names(abstracts) <- c("citation", "journal","keyword", "year")
rm(caselist); rm(cases); rm(keys)

print(addmargins(table(abstracts$journal, abstracts$keyword))) #Distribution of Keywords

                                consumer customer marketing Sum
  Journal of Hydroinformatics          1        2         0   3
  Journal of Water and Health         11        0         2  13
  JWSRT - Aqua                        13       12         1  26
  Water Intelligence Online            0        3         0   3
  Water Policy                        11        9         1  21
  Water Practice and Technology        7       13         0  20
  Water Science and Technology        42       19         4  65
  WST: Water Supply                   30       42         7  79
  Sum                                115      100        15 230
kleur <- gray.colors(3)
par(mar=c(4,4,1,1), cex=1, lty=0)
barplot(table(abstracts$keyword, abstracts$year), ylab="Abstracts", xlab="Year of Publication", col=kleur)
legend("topleft", legend=rev(keywords$name), fill=rev(kleur))

plot of chunk unnamed-chunk-1


unique <- abstracts[!duplicated(abstracts$citation),] #Remove double keywords
journals <- as.data.frame(table(unique$journal))
journals <- journals[order(journals$Freq, decreasing=T),]
names(journals) <- c("journal", "abstracts")
par(mar=c(12,4,1,1))
barplot(journals$abstracts, names.arg=journals$journal, las=2, ylab="Abstracts", cex.axis=.8)

plot of chunk unnamed-chunk-1

print(journals)
                        journal abstracts
8             WST: Water Supply        67
7  Water Science and Technology        59
3                  JWSRT - Aqua        25
5                  Water Policy        17
6 Water Practice and Technology        17
2   Journal of Water and Health        13
1   Journal of Hydroinformatics         3
4     Water Intelligence Online         3
paste("Number of abstracts: ", length(getFiles()), "(",sum(journals$abstracts),")")
[1] "Number of abstracts:  204 ( 204 )"
paste("Number of journals:", length(getCaseIds()), "(",length(journals$abstracts),")")
[1] "Number of journals: 8 ( 8 )"

#Write graph to disk
jpeg(width=1024, height=768)
par(cex=2, lty=0)
barplot(table(abstracts$keyword, abstracts$year), ylab="Abstracts", xlab="Year of Publication", col=kleur)
legend("topleft", legend=rev(keywords$name), fill=rev(kleur))
dev.off()
pdf 
  2