peter — May 10, 2014, 4:34 PM
#PRELIMINARIES
setwd("~/Documents/PhD/Analysis/IWA") #Set working directory
library(RQDA) #Load RQDA
Loading required package: RSQLite
Loading required package: DBI
Loading required package: gWidgetsRGtk2
Loading required package: RGtk2
Loading required package: gWidgets
RQDA() #Open RQDA
openProject("LiteratureReview.rqda", updateGUI=TRUE) #Open project
#DESCRIPTIVE STATISTICS
#Extract Data
library(stringr)
cases <- RQDAQuery("SELECT fid, caseid FROM caselinkage WHERE status==1")
caselist <- RQDAQuery("SELECT id, name FROM cases WHERE status==1")
cases <- merge(cases, caselist, by.x="caseid", by.y="id")
paste("Cases per abstract:", nrow(cases)/length(getFiles())) #Ensure only one case per file
[1] "Cases per abstract: 1"
keywords <- RQDAQuery("SELECT catid, name FROM filecat WHERE status==1")
keys <- RQDAQuery("SELECT fid, catid FROM treefile WHERE status==1")
keys <- merge(keywords,keys, by="catid")
paste("Keywords per abstract", length(keys$fid)/length(unique(keys$fid))) #Keywords per abstract
[1] "Keywords per abstract 1.12745098039216"
abstracts <- RQDAQuery("SELECT id AS fid, name FROM source WHERE status==1")
abstracts <- merge(abstracts, cases, by="fid")
abstracts <- merge(abstracts, keys, by="fid")
abstracts$year <- as.numeric(str_sub(abstracts$name.x,-4)) #Year of publication
abstracts <- abstracts[,c(-1,-3,-5)]
names(abstracts) <- c("citation", "journal","keyword", "year")
rm(caselist); rm(cases); rm(keys)
print(addmargins(table(abstracts$journal, abstracts$keyword))) #Distribution of Keywords
consumer customer marketing Sum
Journal of Hydroinformatics 1 2 0 3
Journal of Water and Health 11 0 2 13
JWSRT - Aqua 13 12 1 26
Water Intelligence Online 0 3 0 3
Water Policy 11 9 1 21
Water Practice and Technology 7 13 0 20
Water Science and Technology 42 19 4 65
WST: Water Supply 30 42 7 79
Sum 115 100 15 230
kleur <- gray.colors(3)
par(mar=c(4,4,1,1), cex=1, lty=0)
barplot(table(abstracts$keyword, abstracts$year), ylab="Abstracts", xlab="Year of Publication", col=kleur)
legend("topleft", legend=rev(keywords$name), fill=rev(kleur))
unique <- abstracts[!duplicated(abstracts$citation),] #Remove double keywords
journals <- as.data.frame(table(unique$journal))
journals <- journals[order(journals$Freq, decreasing=T),]
names(journals) <- c("journal", "abstracts")
par(mar=c(12,4,1,1))
barplot(journals$abstracts, names.arg=journals$journal, las=2, ylab="Abstracts", cex.axis=.8)
print(journals)
journal abstracts
8 WST: Water Supply 67
7 Water Science and Technology 59
3 JWSRT - Aqua 25
5 Water Policy 17
6 Water Practice and Technology 17
2 Journal of Water and Health 13
1 Journal of Hydroinformatics 3
4 Water Intelligence Online 3
paste("Number of abstracts: ", length(getFiles()), "(",sum(journals$abstracts),")")
[1] "Number of abstracts: 204 ( 204 )"
paste("Number of journals:", length(getCaseIds()), "(",length(journals$abstracts),")")
[1] "Number of journals: 8 ( 8 )"
#Write graph to disk
jpeg(width=1024, height=768)
par(cex=2, lty=0)
barplot(table(abstracts$keyword, abstracts$year), ylab="Abstracts", xlab="Year of Publication", col=kleur)
legend("topleft", legend=rev(keywords$name), fill=rev(kleur))
dev.off()
pdf
2