Data are from http://www.journalprices.com
library("gdata")
j_df <- read.xls("http://www.journalprices.com/2013_FinalSummaryForWeb.xlsx", stringsAsFactors = FALSE)
# str(j_df)
# View(j_df)
mean_price_article <- unname(unlist(j_df[51, ,drop = TRUE]))
mean_price_citation <- unname(unlist(j_df[52, ,drop = TRUE]))
field <- unname(unlist(j_df[2, ,drop = TRUE]))
ratios <- data.frame(field = field,
`Mean price per article` = mean_price_article,
`Mean price per citation` = mean_price_citation
)
ratios <- ratios[-c(1, nrow(ratios)), ]
library("reshape2")
ratios_m <- melt(ratios, id = "field")
ratios_m$value <- as.numeric(ratios_m$value )
names(ratios_m) <- c('Subject Area', 'variable', 'Ratio')
levels(ratios_m$variable) <- c("Mean price per article", "Mean price per citation")
# reorder subject area by ratio
ratios_m$`Subject Area` <- factor(ratios_m$`Subject Area`, levels = ratios_m$`Subject Area`[order(-ratios_m$Ratio)])
library("ggplot2")
ggplot(ratios_m, aes(`Subject Area`, Ratio)) +
geom_bar(stat = "identity") +
coord_flip() +
facet_wrap(~ variable, nrow = 2) +
theme_bw(base_size = 14) +
xlab("") +
ggtitle("Price ratios for For-profit to Non-profit journals")
# how much more?
range_price_article <- round(range(ratios_m[ratios_m$variable == 'Mean price per article',]$Ratio),1)
mean_price_article <- round(mean(ratios_m[ratios_m$variable == 'Mean price per article',]$Ratio),1)
Articles in for-profit journals are on average 4.7 more expensive than articles in non-profit journals (range: 2.2, 19.1)
# get data from the web
j2_df <- read.xls("http://journalprices.com/SummaryData.xls", stringsAsFactors = FALSE)
# Inspect it
# str(j2_df)
# View(j2_df)
# clean a little...
j2_df$PROFIT.STATUS <- ifelse(j2_df$PROFIT.STATUS == "Non-profit",
"Non-Profit", j2_df$PROFIT.STATUS)
# exclude journal titles that we don't know the profit status of
j2_df <- j2_df[j2_df$PROFIT.STATUS != "Unknown", ]
# how many journals do we have?
n_journals <- nrow(j2_df)
# really that many unique titles?
identical(length(unique(j2_df$TITLE)), n_journals)
## [1] TRUE
# prepare the plot title
title <- paste("Article costs in ", n_journals, " scholarly journals. \nData from http://www.journalprices.com/", sep = "")
# compute stats to use in x-axis tick labels
non_profit_cost_mean <- round(mean(j2_df[j2_df$PROFIT.STATUS == "Non-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2)
for_profit_cost_mean <- round(mean(j2_df[j2_df$PROFIT.STATUS == "For-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2)
non_profit_cost_median <- round(median(j2_df[j2_df$PROFIT.STATUS == "Non-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2)
for_profit_cost_median <- round(median(j2_df[j2_df$PROFIT.STATUS == "For-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2)
# draw plot
library(scales)
library(ggplot2)
ggplot(j2_df, aes(PROFIT.STATUS, PRICE.PER.ARTICLE)) +
geom_jitter(alpha = 0.35) +
ylab("Price per article (USD)") +
ggtitle(title) +
ylim(0,200) +
theme_bw(base_size = 16) +
scale_x_discrete("", labels=c(paste0("For-Profit Journals\nmean = $",
for_profit_cost_mean, "\nmedian = $",
for_profit_cost_median),
paste0("Non-Profit Journals\nmean = $",
non_profit_cost_mean, "\nmedian = $",
non_profit_cost_median) ))
ggsave("costs.png", dpi = 600)
ggjournal <- ggplot(j2_df, aes(PRICE.PER.ARTICLE, PRICE.PER.CITATION, colour = PROFIT.STATUS, text = TITLE)) +
geom_point(alpha = 0.35) +
scale_x_log10() +
scale_y_log10() +
theme_bw(base_size = 16)
ggjournal