Part 1

See Green OA and Academic Libraries for an explanation of the following.

Part 1

art1 <- read.csv("art1.csv")
# Save as new variables
ft2012 <- as.numeric(art1$fullText2012)
# Transform from 1s and 2s to 0s and 1s
ft2012 <- ft2012 - 1

# Log transform citation counts
ctLog2012 <- log(art1$citation2012 + 1)

# Remove NAs in ft2012Factor variable and corresponding NAs in ctLog2012
# variable
ctLog2012 <- ctLog2012[complete.cases(ft2012)]
ft2012 <- ft2012[complete.cases(ft2012)]

# Create data frame from variables
artData <- cbind(ft2012, ctLog2012)
artData <- as.data.frame(artData)

# Model the logistic regression
mylogit <- glm(ft2012 ~ ctLog2012, data = artData, family = "binomial")

summary(mylogit)

## 
## Call:
## glm(formula = ft2012 ~ ctLog2012, family = "binomial", data = artData)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.844  -1.209   0.821   1.029   1.501  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -0.7346     0.1960   -3.75  0.00018 ***
## ctLog2012     0.2930     0.0512    5.72    1e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 902.56  on 661  degrees of freedom
## Residual deviance: 867.08  on 660  degrees of freedom
## AIC: 871.1
## 
## Number of Fisher Scoring iterations: 4

exp(mylogit$coefficients)

## (Intercept)   ctLog2012 
##      0.4797      1.3404

exp(confint(mylogit))

## Waiting for profiling to be done...

##              2.5 % 97.5 %
## (Intercept) 0.3251 0.7017
## ctLog2012   1.2144 1.4846


# Plot regression
plot(artData$ctLog2012, artData$ft2012, xlab = "Citations 2012, Log Transformed", 
    ylab = "Probability of Full Text, 2012")

curve(predict(mylogit, data.frame(ctLog2012 = x), type = "resp"), add = TRUE, 
    col = "red")

# Plot with ggplot2
library(ggplot2)

plot of chunk unnamed-chunk-1

ggplot(artData, aes(ctLog2012, ft2012)) + geom_point() + geom_smooth(method = "glm", 
    family = "binomial") + xlab("Citations, 2012, Log Transformed") + ylab("Probability of Full Text, 2012") + 
    ggtitle("Logistic Regression, 2012 Data")

plot of chunk unnamed-chunk-1

Part 2

# Full Text Sources 2012
library(pander)


Attaching package: 'pander'

The following object is masked from 'package:knitr':

    pandoc

panderOptions("table.style", "rmarkdown")
fullTextSources_2012 <- read.csv("fullTextSources2012.csv")
fullTextSources_2012 <- as.data.frame(fullTextSources_2012)
pandoc.table(summary(fullTextSources_2012$Type), caption = "Number of entities providing full text articles")



|  activism  |  business  |  government  |  national  |  organization  |
|:----------:|:----------:|:------------:|:----------:|:--------------:|
|     1      |     10     |      4       |     5      |       1        |

Table: Number of entities providing full text articles (continued below)



|  personal  |  publisher  |  university  |  unknown  |
|:----------:|:-----------:|:------------:|:---------:|
|     9      |     46      |     145      |     8     |

pandoc.table(tapply(fullTextSources_2012$Count, fullTextSources_2012$Type, FUN = sum), 
    caption = "Number of documents provided by entities")



|  activism  |  business  |  government  |  national  |  organization  |
|:----------:|:----------:|:------------:|:----------:|:--------------:|
|     1      |     11     |      46      |     6      |       1        |

Table: Number of documents provided by entities (continued below)



|  personal  |  publisher  |  university  |  unknown  |
|:----------:|:-----------:|:------------:|:---------:|
|     9      |     100     |     199      |     9     |

ftS2012 <- as.data.frame(round(table(fullTextSources_2012$Count, fullTextSources_2012$Type)/sum(fullTextSources_2012$Count), 
    2))
pandoc.table(tapply(ftS2012$Freq, ftS2012$Var2, FUN = sum), caption = "Proportion of entities providing full text access")



|  activism  |  business  |  government  |  national  |  organization  |
|:----------:|:----------:|:------------:|:----------:|:--------------:|
|     0      |    0.02    |      0       |    0.01    |       0        |

Table: Proportion of entities providing full text access (continued below)



|  personal  |  publisher  |  university  |  unknown  |
|:----------:|:-----------:|:------------:|:---------:|
|    0.02    |    0.13     |     0.38     |   0.02    |

sum(ftS2012$Freq)  # out of the total proportion of OA articles in the sample

[1] 0.58