Netwrok Analysis Group Project

Packages

library(tidyverse)
library(dplyr)
library(readr)
library(tidyr)
library(tidyselect)
library(igraph)

Setting working directory and tidying data

setwd("~/Desktop/R/Network Analysis Group Project/Raw Data") 

# merges rows from all csv files in the working directory
alt_data <- list.files(full.names = TRUE) %>% 
  lapply(read_csv) %>% 
  bind_rows %>%
  data.frame %>% # converts into a dataframe
  
# sorts out variables that we won't need
  select(-Journal.Collection.Title,-Journal.ISSNs,-Authors.at.my.Institution, 
         -Departments, -OA.Status, -ISBN, -National.Clinical.Trial.ID, -URI, 
         -PubMed.ID, PubMedCentral.ID, -Handle.net.IDs, -ADS.Bibcode, arXiv.ID,
         -RePEc.ID, -SSRN, -URN, -Details.Page.URL, -Badge.URL, -Subjects..FoR.) %>%
  
# removes rows for observations where funders are unknown  
  filter(!is.na(Funder)) %>% 
  mutate(id = row_number()) %>%
  
# splits papers with multiple funders into multiple rows
  separate_rows(Funder, sep = ";", convert = FALSE) %>% 
  
# splits papers with multiple organisations into multiple rows   
  separate_rows(Affiliations..GRID., sep = ";", convert = FALSE) %>% 
  rename(Organisation = Affiliations..GRID.) %>%

# removes papers that were not mentioned at least once in policy documents
  filter(News.mentions > 1) 

To be continued