library(ggplot2)
# Preprocessing for density chart
raw1 <- read.csv("JMoSp_noLink_ready_for_processing.csv")
str(raw1)
## 'data.frame': 1529 obs. of 12 variables:
## $ Column : int 0 1 2 3 4 5 6 7 8 9 ...
## $ article : Factor w/ 1529 levels "S0022285203002546.xml",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Non_self_referencial_link : Factor w/ 1 level "False": 1 1 1 1 1 1 1 1 1 1 ...
## $ volume : int 223 223 223 223 223 223 223 223 223 223 ...
## $ pre_link_text : Factor w/ 1529 levels " (FTIR)Journal of Molecular Spectroscopy 257 (2009) 187-199. doi:10.1016j.jms.2009.08.004journalJournal of Molecular Spectrosco"| __truncated__,..: 398 679 989 70 986 622 79 748 1212 374 ...
## $ link_text : Factor w/ 1529 levels "http://dx.doi.org/10.1016/j.jms.2003.08.008doi:10.1016/j.jms.2003.08.008JournalsS300.1",..: 3 1 2 5 4 9 8 7 12 6 ...
## $ clean_link_text_not_article: logi NA NA NA NA NA NA ...
## $ DOI_for_article : Factor w/ 1529 levels "10.1016/j.jms.2003.08.008",..: 3 1 2 5 4 9 8 7 12 6 ...
## $ linkDOI : Factor w/ 1529 levels "https://dx.doi.org/10.1016/j.jms.2003.08.008",..: 3 1 2 5 4 9 8 7 12 6 ...
## $ citationCount : int 2 9 6 2 6 21 27 10 12 0 ...
## $ Date : Factor w/ 122 levels "2004-01-00","2004-02-00",..: 1 2 1 2 1 1 1 1 1 1 ...
## $ dateYear : int 2004 2004 2004 2004 2004 2004 2004 2004 2004 2004 ...
colnames(raw1)
## [1] "Column" "article"
## [3] "Non_self_referencial_link" "volume"
## [5] "pre_link_text" "link_text"
## [7] "clean_link_text_not_article" "DOI_for_article"
## [9] "linkDOI" "citationCount"
## [11] "Date" "dateYear"
keeps1 <- c("Non_self_referencial_link","citationCount", "dateYear")
link_data1 <- raw1[keeps1]
raw2 <- read.csv("JMoSp_records_ready_for_processing.csv")
keeps2 <- c("Non_self_referencial_link","citationCount", "dateYear")
link_data2 <- raw2[keeps2]
link_data <- rbind(link_data1,link_data2)
colnames(link_data) <- c("Links", "Citations", "Year")
# Preprocessing for histogram
keeps3 <- c("Column","StatusOverview", "dateYear")
chart2 <- raw2[keeps3]
colnames(chart2) <- c("number", "Status", "Year")
chart2$number <- factor(chart2$number)
chart2$Year <- factor(chart2$Year)
str(chart2)
## 'data.frame': 1634 obs. of 3 variables:
## $ number: Factor w/ 1634 levels "0","1","2","3",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Status: Factor w/ 2 levels "ACTIVE","BROKEN": 1 1 1 1 2 2 1 1 1 2 ...
## $ Year : Factor w/ 11 levels "2004","2005",..: 1 1 1 1 1 1 1 1 1 1 ...
Plotting
# Desity plots
link_data$Links <- factor(link_data$Links)
l <- ggplot(link_data, aes(x = Citations, fill = Links)) +
geom_density(alpha = .3) +
facet_grid(Year ~.) +
scale_fill_manual( values = c("green","blue"))
l
# Bar plot
ggplot(chart2,aes(x=Year,fill=Status)) + geom_bar(position="dodge")