Scholarly journal pricing: For-Profit vs Non-Profit

Data are from http://www.journalprices.com

library("gdata")

j_df <- read.xls("http://www.journalprices.com/2013_FinalSummaryForWeb.xlsx", stringsAsFactors = FALSE)

# str(j_df)
# View(j_df)

mean_price_article <- unname(unlist(j_df[51, ,drop = TRUE]))
mean_price_citation <- unname(unlist(j_df[52, ,drop = TRUE]))
field <- unname(unlist(j_df[2, ,drop = TRUE]))

ratios <- data.frame(field = field,
                     `Mean price per article` = mean_price_article, 
                     `Mean price per citation` = mean_price_citation 
                    )

ratios <- ratios[-c(1, nrow(ratios)), ]

library("reshape2")
ratios_m <- melt(ratios, id = "field")
ratios_m$value <- as.numeric(ratios_m$value )
names(ratios_m) <- c('Subject Area', 'variable', 'Ratio')
levels(ratios_m$variable) <- c("Mean price per article", "Mean price per citation")

# reorder subject area by ratio
ratios_m$`Subject Area` <- factor(ratios_m$`Subject Area`, levels = ratios_m$`Subject Area`[order(-ratios_m$Ratio)])

library("ggplot2")
ggplot(ratios_m, aes(`Subject Area`, Ratio)) +
  geom_bar(stat = "identity") +
  coord_flip() + 
  facet_wrap(~ variable, nrow = 2) +
  theme_bw(base_size = 14) +
  xlab("") + 
  ggtitle("Price ratios for For-profit to Non-profit journals")

# how much more?
range_price_article <- round(range(ratios_m[ratios_m$variable == 'Mean price per article',]$Ratio),1)

mean_price_article <- round(mean(ratios_m[ratios_m$variable == 'Mean price per article',]$Ratio),1)

Articles in for-profit journals are on average 4.7 more expensive than articles in non-profit journals (range: 2.2, 19.1)

# get data from the web
j2_df <- read.xls("http://journalprices.com/SummaryData.xls", stringsAsFactors = FALSE)

# Inspect it
# str(j2_df)
# View(j2_df)

# clean a little...
j2_df$PROFIT.STATUS <- ifelse(j2_df$PROFIT.STATUS == "Non-profit", 
                              "Non-Profit", j2_df$PROFIT.STATUS)

# exclude journal titles that we don't know the profit status of
j2_df <- j2_df[j2_df$PROFIT.STATUS != "Unknown", ]

# how many journals do we have?
n_journals <- nrow(j2_df)
# really that many unique titles?
identical(length(unique(j2_df$TITLE)), n_journals)
## [1] TRUE
# prepare the plot title
title <- paste("Article costs in ", n_journals, " scholarly journals. \nData from http://www.journalprices.com/", sep = "")

# compute stats to use in x-axis tick labels
non_profit_cost_mean <- round(mean(j2_df[j2_df$PROFIT.STATUS == "Non-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2)

for_profit_cost_mean <- round(mean(j2_df[j2_df$PROFIT.STATUS == "For-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2) 

non_profit_cost_median <- round(median(j2_df[j2_df$PROFIT.STATUS == "Non-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2)

for_profit_cost_median <- round(median(j2_df[j2_df$PROFIT.STATUS == "For-Profit", ]$PRICE.PER.ARTICLE, na.rm = TRUE), 2) 

# draw plot
library(scales) 
library(ggplot2)
ggplot(j2_df, aes(PROFIT.STATUS, PRICE.PER.ARTICLE)) +
  geom_jitter(alpha = 0.35) +
  ylab("Price per article (USD)") +
  ggtitle(title) +
  ylim(0,200) + 
  theme_bw(base_size = 16) + 
  scale_x_discrete("", labels=c(paste0("For-Profit Journals\nmean = $",
                                       for_profit_cost_mean, "\nmedian = $",
                                       for_profit_cost_median), 
                                paste0("Non-Profit Journals\nmean = $",
                                       non_profit_cost_mean, "\nmedian = $",
                                       non_profit_cost_median) ))

ggsave("costs.png", dpi = 600)
ggjournal <- ggplot(j2_df, aes(PRICE.PER.ARTICLE, PRICE.PER.CITATION, colour = PROFIT.STATUS, text = TITLE)) +
  geom_point(alpha = 0.35) +
  scale_x_log10() +
  scale_y_log10() +
  theme_bw(base_size = 16)  
ggjournal