1 Titles and Abstracts

1.1 How many?

library(readxl)
library(DT)
library(dplyr)
library(reshape2)
library(data.table)
library(tidyr)

setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/000 Topics/XML")

dat1 <- read_excel("BikeShare_5_062020.xlsx", sheet="fin00")
dat2 <- dat1[,c("title", "abstract", "year", "serial", "publisher")]
dat2a <- dat2[!duplicated(dat2[,c(1:5)]),]

dat2a$non_na <- 3- apply(dat2a, 1, function(x) sum(is.na(x)))

# use aggregate to create new data frame with the maxima
dat2b <- aggregate(non_na ~ title, dat2a, max)
# then simply merge with the original
dat2c  <- merge(dat2b, dat2a)
dim(dat2c)
## [1] 206   6
dat4 <- dat1[,c("title", "term", "subject_area", "author")]
dat4a <- dat4[!duplicated(dat4[,c(1:4)]),]
dat5 = melt(dat4a, id.vars = c("title" ))
dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat2e <- dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][!duplicated(dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][,c(1:2)]),]
dat8= left_join(dat7, dat2e, by="title")

1.2 By Key Words

dat8a= subset(dat8, year > 1990)
dat9 <- dat8a[,c("value", "year")] %>% group_by(value, year) %>% summarise(count = n()) %>%
  spread(year, count)
## `summarise()` regrouping output by 'value' (override with `.groups` argument)
dat9[is.na(dat9)] <- 0 


dat9$Total=rowSums(dat9[,c(2:6)])
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.3 By Author Numbering (First, …)

dat8a= subset(dat8, variable=="author")
dat9 <- dat8a[,c("value", "rowind")] %>% group_by(value, rowind) %>% summarise(count = n()) %>%
  spread(rowind, count)
## `summarise()` regrouping output by 'value' (override with `.groups` argument)
dat9[is.na(dat9)] <- 0 
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.4 By Author Year

dat8a= subset(dat8, variable=="author")
dat8a= subset(dat8a, year > 1990)
dat9 <- dat8a[,c("value", "year")] %>% group_by(value, year) %>% summarise(count = n()) %>%
  spread(year, count)
## `summarise()` regrouping output by 'value' (override with `.groups` argument)
dat9[is.na(dat9)] <- 0 
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.5 By Journal

dat4 <- dat1[,c("title", "serial")]
dat4a <- dat4[!duplicated(dat4[,c(1:2)]),]
dat5 = melt(dat4a, id.vars = c("title" ))
## Warning in melt(dat4a, id.vars = c("title")): The melt generic in data.table
## has been passed a tbl_df and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(dat4a). In the next version, this warning will become an error.
dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat2e <- dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][!duplicated(dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][,c(1:2)]),]
dat8= left_join(dat7, dat2e, by="title")
colnames(dat8)[3] <- "serial"
dat9 <- dat8[,c("serial", "year")] %>% group_by(serial, year) %>% summarise(count = n()) %>%
  spread(year, count)
## `summarise()` regrouping output by 'serial' (override with `.groups` argument)
dat9[is.na(dat9)] <- 0 
dat9$Total=rowSums(dat9[,c(2:6)])
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.6 Read the titles

dat4 <- dat1[,c("title", "author")]
dat4a <- dat4[!duplicated(dat4[,c(1:2)]),]
dat5 = melt(dat4a, id.vars = c("title" ))
## Warning in melt(dat4a, id.vars = c("title")): The melt generic in data.table
## has been passed a tbl_df and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(dat4a). In the next version, this warning will become an error.
dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat7a= subset(dat7, rowind==1)
dat2e <- dat1[,c("title", "serial", "year")][complete.cases(dat1[,c("title", "serial", "year")]), ][!duplicated(dat1[,c("title", "serial", "year")][complete.cases(dat1[,c("title", "serial", "year")]), ][,c(1:3)]),]
dat8= left_join(dat7a, dat2e, by="title")
colnames(dat8)[3] <- "first_author"


datatable(
  dat8[,c(6, 1, 3, 5)], extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)

1.7 Read the abstracts

datatable(
  dat2c[,c(1,3:5)], extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)