General
library(readxl)
library(DT)
library(dplyr)
library(reshape2)
library(data.table)
setwd("C:/Users/s-das/Syncplicity Folders/MyProjects_IMP/MY_Papers_V2/TRB 2021/00_Topics/XML")
dat1 <- read_excel("Abdel-Aty_032020.xlsx", sheet="fin00")
dat2 <- dat1[,c("title", "abstract", "year", "serial", "publisher")]
dat2a <- dat2[!duplicated(dat2[,c(1:5)]),]
dat2a$non_na <- 3- apply(dat2a, 1, function(x) sum(is.na(x)))
# use aggregate to create new data frame with the maxima
dat2b <- aggregate(non_na ~ title, dat2a, max)
# then simply merge with the original
dat2c <- merge(dat2b, dat2a)
## write.csv(dat2c, "Teen_Novice.csv")
dat4 <- dat1[,c("title", "term", "subject_area", "author")]
dat4a <- dat4[!duplicated(dat4[,c(1:4)]),]
dat5 = melt(dat4a, id.vars = c("title" ))
dat6 = dat5[complete.cases(dat5), ]
dat7= dat6 %>% group_by(title, variable) %>% mutate(rowind = row_number())
dat2e <- dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][!duplicated(dat1[,c("title", "year")][complete.cases(dat1[,c("title", "year")]), ][,c(1:2)]),]
dat8= left_join(dat7, dat2e, by="title")
dat8a= subset(dat8, variable=="term")
datatable(dat8a)
library(tidyr)
dat9 <- dat8a[,c("value", "year")] %>% group_by(value, year) %>% summarise(count = n()) %>%
spread(year, count)
dat9[is.na(dat9)] <- 0
dat9$WordCount=rowSums(dat9[,c(2:22)])
datatable(dat9, extensions = c('Scroller', 'FixedColumns', 'Buttons'), options = list(
deferRender = TRUE,
scrollY = TRUE,
scrollX = TRUE,
fixedColumns = list(leftColumns = 2, rightColumns = 1),
buttons = c('csv', 'excel')
))