1 Titles and Abstracts

1.1 How many?

library(readxl)
library(DT)
library(dplyr)
library(reshape2)
library(data.table)
library(tidyr)

setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/000 Topics/XML")

dat1 <- read_excel("BikeShare_5_062020.xlsx", sheet="fin00")
dat2 <- dat1[,c("title", "abstract", "year", "serial", "publisher")]
dat2a <- dat2[!duplicated(dat2[,c(1:5)]),]

dat2a$non_na <- 3- apply(dat2a, 1, function(x) sum(is.na(x)))

# use aggregate to create new data frame with the maxima
dat2b <- aggregate(non_na ~ title, dat2a, max)
# then simply merge with the original
dat2c  <- merge(dat2b, dat2a)
dim(dat2c)

## [1] 206   6

dat4 <- dat1[,c("title", "term", "subject_area", "author")]
dat4a <- dat4[!duplicated(dat4[,c(1:4)]),]
dat5 = melt(dat4a, id.vars = c("title" ))
dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat2e <- dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][!duplicated(dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][,c(1:2)]),]
dat8= left_join(dat7, dat2e, by="title")

1.2 By Key Words

dat8a= subset(dat8, year > 1990)
dat9 <- dat8a[,c("value", "year")] %>% group_by(value, year) %>% summarise(count = n()) %>%
  spread(year, count)

## `summarise()` regrouping output by 'value' (override with `.groups` argument)

dat9[is.na(dat9)] <- 0 


dat9$Total=rowSums(dat9[,c(2:6)])
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.3 By Author Numbering (First, …)

dat8a= subset(dat8, variable=="author")
dat9 <- dat8a[,c("value", "rowind")] %>% group_by(value, rowind) %>% summarise(count = n()) %>%
  spread(rowind, count)

## `summarise()` regrouping output by 'value' (override with `.groups` argument)

dat9[is.na(dat9)] <- 0 
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.4 By Author Year

dat8a= subset(dat8, variable=="author")
dat8a= subset(dat8a, year > 1990)
dat9 <- dat8a[,c("value", "year")] %>% group_by(value, year) %>% summarise(count = n()) %>%
  spread(year, count)

## `summarise()` regrouping output by 'value' (override with `.groups` argument)

dat9[is.na(dat9)] <- 0 
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.5 By Journal

dat4 <- dat1[,c("title", "serial")]
dat4a <- dat4[!duplicated(dat4[,c(1:2)]),]
dat5 = melt(dat4a, id.vars = c("title" ))

## Warning in melt(dat4a, id.vars = c("title")): The melt generic in data.table
## has been passed a tbl_df and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(dat4a). In the next version, this warning will become an error.

dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat2e <- dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][!duplicated(dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][,c(1:2)]),]
dat8= left_join(dat7, dat2e, by="title")
colnames(dat8)[3] <- "serial"
dat9 <- dat8[,c("serial", "year")] %>% group_by(serial, year) %>% summarise(count = n()) %>%
  spread(year, count)

## `summarise()` regrouping output by 'serial' (override with `.groups` argument)

dat9[is.na(dat9)] <- 0 
dat9$Total=rowSums(dat9[,c(2:6)])
datatable(dat9, extensions = c('Scroller', 'FixedColumns'), options = list(
  deferRender = TRUE,
  scrollY = TRUE,
  scrollX = TRUE,
  fixedColumns = list(leftColumns = 2, rightColumns = 1)
))

1.6 Read the titles

dat4 <- dat1[,c("title", "author")]
dat4a <- dat4[!duplicated(dat4[,c(1:2)]),]
dat5 = melt(dat4a, id.vars = c("title" ))

## Warning in melt(dat4a, id.vars = c("title")): The melt generic in data.table
## has been passed a tbl_df and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(dat4a). In the next version, this warning will become an error.

dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat7a= subset(dat7, rowind==1)
dat2e <- dat1[,c("title", "serial", "year")][complete.cases(dat1[,c("title", "serial", "year")]), ][!duplicated(dat1[,c("title", "serial", "year")][complete.cases(dat1[,c("title", "serial", "year")]), ][,c(1:3)]),]
dat8= left_join(dat7a, dat2e, by="title")
colnames(dat8)[3] <- "first_author"


datatable(
  dat8[,c(6, 1, 3, 5)], extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)

1.7 Read the abstracts

datatable(
  dat2c[,c(1,3:5)], extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)

BikeShare

Subasish Das (@subasish_das)

2020-06-18

1 Titles and Abstracts

1.1 How many?

1.2 By Key Words

1.3 By Author Numbering (First, …)

1.4 By Author Year

1.5 By Journal

1.6 Read the titles

1.7 Read the abstracts