This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Note that the echo = FALSE
parameter was added to code
chunks to prevent printing of the R code that generated the plot.
# Goal: to add variables concerning date of publication and output back to the same dataset
# install (for the first time only) and load packages:
#install.packages("tidyverse")
#install.packages("readxl")
#install.packages("lubridate")
#library(tidyverse)
#library(readxl)
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# read in (or import) raw Publication *.csv file:
pubs <- read_excel("2021.09.23 Publications.xlsx")
# optional: missing values for the variables year_cite, month, or day will result in cases failing to parse. replace NA in the variable, day, with the value of 1 before creating new variables with day:
#pubs$day[is.na(pubs$day)] <- 1
# create new variables (i.e., year_month_date, year_month, date_publish) that strings existing variables (i.e., year_cite, month, day) in YMD format, separated by single spaces:
pubs <- pubs %>%
mutate(year_month_date = paste(year_cite, month, day, sep= ' '),
year_month = paste(year_cite, month, sep= ' '),
# if day is NA, use the new year_month_day variable into YMD format:
date_publish = case_when(!is.na(day) ~ ymd(year_month_date),
# if day and month are NA, use the new year_month variable into YM format:
is.na(day) & !is.na(month) ~ ym(year_month)) )
## Warning: 13 failed to parse.
# create a new variable (i.e, weeks_from_start) that computes the age of RADx-UP at the time of each publication. this will be useful for summary statistics, plots, etc. at different cut points (or scales) of time.
# define 'start_date' of RADx-UP, roughly September 2020:
start_date <- "2020-08-27"
# calculate weeks from start_date:
pubs <- pubs %>%
mutate(weeks_from_start =
as.numeric(difftime(date_publish, start_date, units = "weeks")))
# create a function (i.e., month_function) to define how long a "month" is (?):
month_function <- function(start_date, end_date) {
result <- NA
if (!is.na(end_date)){
result <- length(seq.Date(as.Date(start_date), end_date, by = "month"))}
result
}
# apply month_function to the dataframe (i.e., pubs) to create a new variable (i.e., months_from_start) for how many months old the pubs are:
pubs <- pubs %>%
mutate(months_from_start =
as.numeric(lapply(date_publish, month_function, start_date=start_date)))
# select useful columns to keep in the dataframe
# (i.e., everything but the YMD and YM format variables used to compute other variables):
pubs <- pubs %>%
select(!c(year_month_date, year_month))
# output (or export) the pubs dataframe you created to a *.csv file:
write_csv(pubs, "2021.09.23 Publications_with_date_variables.csv")
# Goal: to clean raw content analysis data and merge with recoded Publication *.csv file (see #2.)
# install (for the first time only) and load packages:
#install.packages("tidyverse")
#install.packages("readxl")
#install.packages("lubridate")
#library(tidyverse)
#library(readxl)
#library(lubridate)
# read in raw content analysis Excel file for the publications:
content <- read_excel("2021.09.23 Content_Analysis-CLEAN.xlsx")
## New names:
## • `Rural` -> `Rural...21`
## • `Rural` -> `Rural...29`
################################################################################
# create a new dataframe (i.e., content_recode) that renames long variables that were survey items:
content_recode <- content %>%
rename(health_equity_addressed = `Health Equity Focus`,
type_of_analysis = `Type of Analysis Identify for analytical studies whether the type of data analysis was qualitative, quantitative, both (mixed methods, literature review, or editorial.`) %>%
mutate(type_of_analysis = case_when(
type_of_analysis == "N/A" ~ as.character(NA),
TRUE ~ type_of_analysis))
# check--summarizes counts of each type of analysis (make sure it sums up to the number of papers in the analytical dataset):
table(content_recode$type_of_analysis)
##
## Clinical case study Editorial Qualitative Quantitative
## 1 5 3 16
################################################################################
# create a new variable that recodes the dichotomous/check-all column options into a single categorical variable (i.e., UP_urban_rural).
# then create additional dichotomous variables (i.e., target_pop_urban, target_pop_rural), based on the created urban_rural variable so that "Both" counts as "target_pop_urban" and as "target_pop_rural":
content_recode <- content_recode %>%
rename(Rural = Rural...29) %>%
mutate(UP_urban_rural = case_when(
(Rural == 1 & Urban==1) ~ "Both",
(Rural == 0 & Urban==1) ~ "Urban",
(Rural == 1 & Urban==0) ~ "Rural",
(Rural == 0 & Urban==0) ~ as.character(NA)),
target_pop_urban = case_when(
UP_urban_rural %in% c("Both","Urban") ~ 1,
TRUE ~ 0),
target_pop_rural = case_when(
UP_urban_rural%in% c("Both","Rural") ~ 1,
TRUE ~ 0))
# check--summarizes cross-tab counts of new categorical variable to previous dichotomous variables:
table(content_recode$UP_urban_rural,
content_recode$Urban, content_recode$Rural)
## , , = 0
##
##
## 0 1
## Both 0 0
## Rural 0 0
## Urban 0 3
##
## , , = 1
##
##
## 0 1
## Both 0 1
## Rural 2 0
## Urban 0 0
table(content_recode$Urban,
content_recode$target_pop_urban)
##
## 0 1
## 0 21 0
## 1 0 4
table(content_recode$Rural,
content_recode$target_pop_rural)
##
## 0 1
## 0 22 0
## 1 0 3
################################################################################
# create a new variable that recodes the dichotomous/check-all column options into a single categorical variable (i.e., UP_age):
content_recode <- content_recode %>%
mutate(UP_age = case_when(
`Children and adolescents (< 18 yo)` == 1 ~ "UP_children",
`Older Adults` == 1 ~ "UP_elders",
TRUE ~ "not_UP_age")) %>%
# rename the dichotomous/check-all columns:
rename(target_pop_children = `Children and adolescents (< 18 yo)`,
target_pop_elders = `Older Adults` )
# check--summarizes cross-tab counts of new categorical variable to previous dichotomous variables:
table(content_recode$UP_age,
content_recode$target_pop_children)
##
## 0 1
## not_UP_age 17 0
## UP_children 0 1
## UP_elders 7 0
table(content_recode$UP_age,
content_recode$target_pop_elders)
##
## 0 1
## not_UP_age 17 0
## UP_children 1 0
## UP_elders 0 7
################################################################################
# create a new variable that recodes the dichotomous/check-all column options into a single categorical variable (i.e., UP_SES):
content_recode <- content_recode %>%
mutate(UP_SES = case_when(
`Low socioeconomic status` == 1 ~ "UP_low_SES",
`Low socioeconomic status` == 0 ~ "not_UP_SES")) %>%
# rename the dichotomous/check-all columns:
rename(target_pop_SES = `Low socioeconomic status`)
# check--summarizes cross-tab counts of new categorical variable to previous dichotomous variables:
table(content_recode$UP_SES,
content_recode$target_pop_SES)
##
## 0 1
## not_UP_SES 22 0
## UP_low_SES 0 3
################################################################################
# create a new variable that recodes the dichotomous/check-all column options into a single categorical variable (i.e., UP_HCW):
content_recode <- content_recode %>%
mutate(UP_HCW = case_when(
`Health care worker` == 1 ~ "UP_healthcare_worker",
`Health care worker` == 0 ~ "not_UP_healthcare_worker")) %>%
# rename the dichotomous/check-all columns:
rename(target_pop_HCW = `Health care worker`)
# check--summarizes cross-tab counts of new categorical variable to previous dichotomous variables:
table(content_recode$UP_HCW,
content_recode$target_pop_HCW)
##
## 0 1
## not_UP_healthcare_worker 20 0
## UP_healthcare_worker 0 5
################################################################################
# create a new variable that recodes the dichotomous/check-all column options into a single categorical variable (i.e., UP_teacher):
content_recode <- content_recode %>%
mutate(UP_teacher = case_when(
Teacher == 1 ~ "UP_teacher",
Teacher == 0 ~ "not_UP_teacher")) %>%
# rename the dichotomous/check-all columns:
rename(target_pop_teacher = Teacher)
# check--summarizes cross-tab counts of new categorical variable to previous dichotomous variables:
table(content_recode$UP_teacher,
content_recode$target_pop_teacher)
##
## 0 1
## not_UP_teacher 24 0
## UP_teacher 0 1
################################################################################
# create a new variable that recodes the dichotomous/check-all column options into a single categorical variable (i.e., UP_race):
content_recode <- content_recode %>%
mutate(UP_race = case_when(
(`Asian American` == 1 |
`Native Hawaiian` == 1 |
`Pacific Islander` == 1) &
`Black or African American` == 0 &
`Hispanic or Latino`== 0 ~ "UP_Asian",
(`American Indian` == 1 |
`Alaska Native` == 1) &
`Black or African American` == 0 &
`Hispanic or Latino`== 0 ~ "UP_Indigenous",
`Black or African American` == 1 &
`Hispanic or Latino` == 0 ~ "UP_Black",
`Black or African American` == 0 &
`Hispanic or Latino` == 1 ~ "UP_Latinx",
`Black or African American` == 1 &
`Hispanic or Latino` == 1 &
`American Indian` == 0 &
`Asian American` == 0 ~ "UP_Black Latinx",
`Black or African American` == 1 &
`Hispanic or Latino` == 1 &
`American Indian` == 1 &
`Asian American` == 0 ~ "UP_Black Indigenous Latinx",
`Black or African American` == 1 &
`Hispanic or Latino` == 1 &
`Asian American` == 1 &
`American Indian` == 0 ~ "UP_Black Latinx Asian",
TRUE ~ "no_race_target"),
target_race_binary = case_when(
UP_race == "no_race_target" ~ 0,
TRUE ~ 1)) %>%
# rename the dichotomous/check-all columns:
rename(target_pop_Black = `Black or African American`,
target_pop_Latinx = `Hispanic or Latino`,
target_pop_Indigenous = `American Indian`,
target_pop_Asian = `Asian American`)
# check--summarizes cross-tab counts of new categorical variable to previous dichotomous variables:
table(content_recode$UP_race,
content_recode$target_pop_Black)
##
## 0 1
## no_race_target 14 0
## UP_Black 0 1
## UP_Black Indigenous Latinx 0 1
## UP_Black Latinx 0 5
## UP_Indigenous 2 0
## UP_Latinx 2 0
table(content_recode$UP_race,
content_recode$target_pop_Latinx)
##
## 0 1
## no_race_target 14 0
## UP_Black 1 0
## UP_Black Indigenous Latinx 0 1
## UP_Black Latinx 0 5
## UP_Indigenous 2 0
## UP_Latinx 0 2
table(content_recode$UP_race,
content_recode$target_pop_Indigenous)
##
## 0 1
## no_race_target 14 0
## UP_Black 1 0
## UP_Black Indigenous Latinx 0 1
## UP_Black Latinx 5 0
## UP_Indigenous 0 2
## UP_Latinx 2 0
table(content_recode$UP_race,
content_recode$target_pop_Asian)
##
## 0
## no_race_target 14
## UP_Black 1
## UP_Black Indigenous Latinx 1
## UP_Black Latinx 5
## UP_Indigenous 2
## UP_Latinx 2
table(content_recode$UP_race,
content_recode$target_race_binary)
##
## 0 1
## no_race_target 14 0
## UP_Black 0 1
## UP_Black Indigenous Latinx 0 1
## UP_Black Latinx 0 5
## UP_Indigenous 0 2
## UP_Latinx 0 2
################################################################################
# create a new dataframe (i.e., site_recode) that keeps and renames dichotomous/check-all variables:
site_recode <- content_recode %>%
select(community_health_center = `Community health center`,
hospital = `Hospital` ,
in_home = `In-home` ,
prison = `Prison/jail` ,
care_facility = `Residential care facility`,
school = `School` ,
other = `Other Setting 1`) %>%
mutate(hospital = case_when(
other == "university-based hospital system" ~ 1,
TRUE ~ hospital),
community_events = case_when(
other %in% c("Community organization",
"Free community-based Latino testing events",
"Street outreach") ~ 1 ,
TRUE ~ 0),
vaccination_sites = case_when(
other %in% c("Vaccination sites: churches, clinics, and community events were the COVID-19 vaccine was being administered",
"Vaccination sites [outpatient primary care clinics, churches, community events, and outdoor vaccination drive through locations]") ~ 1 ,
TRUE~0),
other = case_when(
is.na(other) |
hospital == 1 |
community_events == 1|
vaccination_sites == 1 ~ 0,
!is.na(other) ~ 1))
# create a function (i.e., which_site) to help with recoding... (?):
which_site <- function(x, y){names(which(x==y))[1]}
#here we locate which column (e.g., community health center, school, etc.)
#there is a value of 1 and grab the column's name for the new categorical variable, study_site
site_recode <- site_recode %>%
mutate(study_site = apply(site_recode, 1, which_site, y = 1))
# note that a few publications had multiple sites
# "community_health_center" "other": coded as community_health_center
# "hospital" "care_facility": coded as hospital
# add the categorical study_site variable to the content_recode dataframe:
content_recode <- content_recode %>%
mutate(study_site = site_recode$study_site)
#quick check:
table(content_recode$study_site,
content_recode$`School`)
##
## 0 1
## care_facility 8 0
## community_events 1 0
## hospital 2 0
## school 0 1
table(content_recode$study_site,
content_recode$`Community health center`)
##
## 0
## care_facility 8
## community_events 1
## hospital 2
## school 1
################################################################################
# create a new dataframe (i.e., design_recode) that keeps and renames dichotomous/check-all variables:
design_recode <- content_recode %>%
select(experimental = `Experimental`,
quasi_experimental = `Quasi-Experimental` ,
simulation = `Simulation` ,
observational = `Observational` ,
evaluation = `Evaluation` ,
formative = `Formative/exploratory`,
dissemination = `Dissemination & implementation`,
cost_benefit = `Cost-benefit` ,
quant_other = `Other Quantitative Method`,
focus_group = `Focus group` ,
interview = `Interview`,
survey = `Survey`,
qual_other = `Other Qualitative`)
# create a function (i.e., which_design) to help with recoding... (?):
which_design <-function(x, y){paste(names(which(x==y)), collapse = ".")}
# here, we take each study design, note if the pub used it and paste it with other study design types, since studies can have multiple study designs (?)
design_recode <- design_recode %>%
mutate(study_design = apply(design_recode , 1, which_design, y = 1) ,
study_design = case_when(
study_design == "" ~ "no_study_design",
TRUE ~ study_design))
# bind the whole design_recode dataframe to the content_recode dataframe:
content_recode <- bind_cols(content_recode, design_recode,
.name_repair = "minimal")
#quick check:
table(design_recode$study_design,
design_recode$observational)
##
## 0 1
## experimental 1 0
## focus_group 2 0
## focus_group.interview 1 0
## no_study_design 6 0
## observational 0 13
## quasi_experimental.evaluation 1 0
## simulation 1 0
################################################################################
# create a new dataframe (i.e., engage_recode) that keeps and renames dichotomous/check-all variables and dummies comments in "other":
engage_recode <- content_recode %>%
select(engage_broadcast_media = `Broadcast media (e.g., press conferences, TV interviews`,
engage_entertainment = `Entertainment activities (e.g., picnics, raffles` ,
engage_focus_groups = `Focus groups and/or surveys` ,
engage_meetings_events = `In-person or online community presentations or meetings (professional or educational events` ,
engage_social_media = `Internet/social media (e.g., email blast, blog, Facebook, YouTube`,
engage_partnerships = `Partnerships with community-based organizations` ,
engage_print_media = `Print media (e.g., press releases, newspaper articles`,
engage_other1 = `Other Community Engagement 1`,
engage_other2 = `Other Community Engagement 2`) %>%
mutate(engage_advisory_board = case_when(
engage_other1 %in% c("ad-hoc advisory group of community leaders in each SYCT county todevelop a community-informed strategy for distribution of at-home test kits",
"Community Advisory Boards") ~ 1,
TRUE ~ 0),
engage_peer_influence = case_when(
engage_other1 %in% c("Identifying frontline champions",
"Peer influencers",
"Community member referral networks") ~ 1,
TRUE ~ 0),
engage_transportation = case_when(
engage_other1 %in% c("Street outreach; mobile vans",
"transportation to vaccination sites; a network of public health/clinical health service providers to provide vaccinations; widespread vaccinations sites (ex. malls; removal of residential status as a barrier to obtain vaccines; public health worker delivery of vaccines in rural areas") ~ 1,
TRUE ~ 0),
engage_partnerships = case_when(
engage_other1 %in% c("COVID-19 data collection through the partnership for decision-making",
"Multi-disciplinary collaborative research and partnerships focused on addressing community needs and building a translational science workforce to enhance project implementation and reach; faculty members engage closely with local health departments,",
"Conducted an Asian-American and Native Hawaiian/Pacific Islander COVID-19 Needs Assessment") ~ 1,
TRUE ~ engage_partnerships),
engage_meetings_events = case_when(
engage_other1 %in% c("Free community-based Latino testing events") ~ 1,
TRUE ~ engage_meetings_events),
engage_other1 = case_when(
is.na(engage_other1) |
engage_advisory_board == 1 |
engage_peer_influence == 1 |
engage_partnerships == 1 |
engage_meetings_events == 1 |
engage_transportation == 1 ~ 0,
!is.na(engage_other1) ~ 1),
engage_other2 = case_when(is.na(engage_other2) ~ 0,
!is.na(engage_other2) ~ 1))
engage_recode <- engage_recode %>%
mutate(engage_number = apply(engage_recode[, ], 1, sum),
engage_binary = 1 * (engage_number > 0),
cmty_engage = apply(engage_recode, 1, which_design, y = 1),
cmty_engage = case_when(
cmty_engage == " " ~ "no_engagement",
TRUE ~ cmty_engage))
print(engage_recode, width=100000, n =15)
## # A tibble: 25 × 15
## engage_broadcast_media engage_entertainment engage_focus_groups
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 1
## 6 1 0 0
## 7 0 0 0
## 8 0 0 0
## 9 0 0 1
## 10 0 0 0
## 11 0 0 0
## 12 0 0 0
## 13 0 0 0
## 14 0 0 0
## 15 0 0 0
## engage_meetings_events engage_social_media engage_partnerships
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 0 1
## 5 1 0 0
## 6 0 1 1
## 7 0 0 0
## 8 0 0 0
## 9 0 0 0
## 10 0 0 0
## 11 0 0 0
## 12 0 0 0
## 13 0 0 0
## 14 0 0 0
## 15 0 0 0
## engage_print_media engage_other1 engage_other2 engage_advisory_board
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## 11 0 0 0 0
## 12 0 0 0 0
## 13 0 0 0 0
## 14 0 0 0 0
## 15 0 0 0 0
## engage_peer_influence engage_transportation engage_number engage_binary
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 2 1
## 5 0 0 2 1
## 6 0 0 3 1
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 1 1
## 10 0 0 0 0
## 11 0 0 0 0
## 12 0 0 0 0
## 13 0 0 0 0
## 14 0 0 0 0
## 15 0 0 0 0
## cmty_engage
## <chr>
## 1 ""
## 2 ""
## 3 ""
## 4 "engage_meetings_events.engage_partnerships"
## 5 "engage_focus_groups.engage_meetings_events"
## 6 "engage_broadcast_media.engage_social_media.engage_partnerships"
## 7 ""
## 8 ""
## 9 "engage_focus_groups"
## 10 ""
## 11 ""
## 12 ""
## 13 ""
## 14 ""
## 15 ""
## # … with 10 more rows
table(engage_recode$cmty_engage)
##
##
## 17
## engage_broadcast_media.engage_social_media.engage_partnerships
## 1
## engage_focus_groups
## 2
## engage_focus_groups.engage_meetings_events
## 1
## engage_focus_groups.engage_meetings_events.engage_social_media.engage_print_media.engage_peer_influence
## 1
## engage_focus_groups.engage_partnerships
## 1
## engage_meetings_events.engage_partnerships
## 1
## engage_partnerships
## 1
table(engage_recode$cmty_engage,
engage_recode$engage_binary)
##
## 0
## 17
## engage_broadcast_media.engage_social_media.engage_partnerships 0
## engage_focus_groups 0
## engage_focus_groups.engage_meetings_events 0
## engage_focus_groups.engage_meetings_events.engage_social_media.engage_print_media.engage_peer_influence 0
## engage_focus_groups.engage_partnerships 0
## engage_meetings_events.engage_partnerships 0
## engage_partnerships 0
##
## 1
## 0
## engage_broadcast_media.engage_social_media.engage_partnerships 1
## engage_focus_groups 2
## engage_focus_groups.engage_meetings_events 1
## engage_focus_groups.engage_meetings_events.engage_social_media.engage_print_media.engage_peer_influence 1
## engage_focus_groups.engage_partnerships 1
## engage_meetings_events.engage_partnerships 1
## engage_partnerships 1
# bind the whole design_recode dataframe to the content_recode dataframe:
content_recode <- bind_cols(content_recode, engage_recode,
.name_repair = "minimal")
################################################################################
# TSBM (translational science benefits model): vaccines
# need to create 4 basic variables of 1 = did translational science model
# and 0 = did not do it
# then create variable of combinations, number of TSMB as well as binary indicator
# here for vaccines
# here we recode the main variables for clinical, public health, economic
# and policy. recode so that a 1 is did that benefit and 0 otherwise
content_recode <- content_recode %>%
mutate(
TSBM_vaccines_clinical = case_when(
(`Vaccination Clinical Guidelines` == 1 |
`Vaccination Procedures` == 1 |
`Vaccine Technology`== 1) ~ 1,
TRUE ~ 0),
TSBM_vaccines_public_health = case_when(
(`Community Vaccination Services` == 1 |
`Vaccine Education Resources` == 1 |
`Vaccination Accessibility`== 1|
`Vaccine Delivery and Uptake`== 1|
`Vaccine Hesitancy`== 1|
`Software and Digital Health for Vaccination`==1|
`Public Health Vaccination Practices`==1) ~ 1,
TRUE ~ 0),
TSBM_vaccines_economic = case_when(
(`Vaccine License Agreements and Patents` == 1 |
`Vaccine Non-Profit or Commercial Entities` == 1 |
`Vaccine Cost Effectiveness`== 1|
`Vaccine Cost Savings`== 1) ~ 1,
TRUE ~ 0),
TSBM_vaccines_policy = case_when(
(`Vaccination Advisory Activities` == 1 |
`Vaccination Policies and Legislation` == 1) ~ 1,
TRUE ~ 0))
#now creating extra variables, number of TSBM benefits, pasted together
#benefits they did and binary indicator if did any benefit
TSBM_vaccine_columns <- c("TSBM_vaccines_clinical",
"TSBM_vaccines_public_health",
"TSBM_vaccines_economic",
"TSBM_vaccines_policy")
content_recode <- content_recode %>%
mutate(TSBM_vaccines_number = apply(content_recode[, TSBM_vaccine_columns], 1, sum),
TSBM_vaccines_binary = 1 * (TSBM_vaccines_number > 0),
TSBM_vaccines_recode = apply(content_recode[, TSBM_vaccine_columns], 1, which_design, y = 1) ,
TSBM_vaccines_recode = case_when(TSBM_vaccines_recode == "" ~ "no_TSBM_vaccines",
TRUE ~ TSBM_vaccines_recode))
#quick check:
print(content_recode[,c(TSBM_vaccine_columns,
"TSBM_vaccines_binary",
"TSBM_vaccines_recode",
"TSBM_vaccines_number")],
width=1000)
## # A tibble: 25 × 7
## TSBM_vaccines_clinical TSBM_vaccines_public_health TSBM_vaccines_economic
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 0 0
## 5 0 1 0
## 6 0 0 0
## 7 0 0 0
## 8 0 1 0
## 9 0 1 0
## 10 0 0 0
## TSBM_vaccines_policy TSBM_vaccines_binary TSBM_vaccines_recode
## <dbl> <dbl> <chr>
## 1 0 0 no_TSBM_vaccines
## 2 0 0 no_TSBM_vaccines
## 3 0 0 no_TSBM_vaccines
## 4 0 1 TSBM_vaccines_clinical
## 5 0 1 TSBM_vaccines_public_health
## 6 0 0 no_TSBM_vaccines
## 7 0 0 no_TSBM_vaccines
## 8 0 1 TSBM_vaccines_public_health
## 9 0 1 TSBM_vaccines_public_health
## 10 0 0 no_TSBM_vaccines
## TSBM_vaccines_number
## <dbl>
## 1 0
## 2 0
## 3 0
## 4 1
## 5 1
## 6 0
## 7 0
## 8 1
## 9 1
## 10 0
## # … with 15 more rows
table(content_recode$TSBM_vaccines_binary,
content_recode$TSBM_vaccines_number)
##
## 0 1 2
## 0 10 0 0
## 1 0 12 3
table(content_recode$TSBM_vaccines_binary,
content_recode$TSBM_vaccines_recode)
##
## no_TSBM_vaccines TSBM_vaccines_clinical
## 0 10 0
## 1 0 4
##
## TSBM_vaccines_clinical.TSBM_vaccines_public_health
## 0 0
## 1 1
##
## TSBM_vaccines_public_health
## 0 0
## 1 8
##
## TSBM_vaccines_public_health.TSBM_vaccines_economic
## 0 0
## 1 1
##
## TSBM_vaccines_public_health.TSBM_vaccines_policy
## 0 0
## 1 1
################################################################################
# TSBM (translational science benefits model): testing
content_recode <- content_recode %>%
mutate(
TSBM_testing_clinical = case_when(
(`Testing Clinical Guidelines` == 1 |
`Testing Procedures` == 1 |
`Testing Technology`== 1) ~ 1,
TRUE ~ 0),
TSBM_testing_public_health = case_when(
(`Community Testing Services` == 1 |
`Testing Education Resources` == 1 |
`Testing Accessibility`== 1|
`Testing Hesitancy`== 1|
`Software and Digital Health for Testing`== 1|
`Public Health Testing Practices`== 1) ~ 1,
TRUE ~ 0),
TSBM_testing_economic = case_when(
(`Testing License Agreements and Patents` == 1 |
`Testing Non-Profit or Commercial Entities` == 1 |
`Testing Cost Effectiveness`== 1|
`Testing Cost Savings`== 1) ~ 1 ,
TRUE ~ 0),
TSBM_testing_policy = case_when(
(`Testing Advisory Activities` == 1 |
`Testing Policies and Legislation` == 1) ~ 1,
TRUE ~ 0) )
# now creating extra variables, number of TSBM benefits, pasted together
# benefits they did and binary indicator if did any benefit
TSBM_testing_columns <- c("TSBM_testing_clinical",
"TSBM_testing_public_health",
"TSBM_testing_economic",
"TSBM_testing_policy")
content_recode <- content_recode %>%
mutate(TSBM_testing_number = apply(content_recode[, TSBM_testing_columns], 1, sum),
TSBM_testing_binary = 1 * (TSBM_testing_number > 0),
TSBM_testing_recode = apply(content_recode[, TSBM_testing_columns], 1, which_design, y = 1) ,
TSBM_testing_recode = case_when(TSBM_testing_recode == "" ~ "no_TSBM_vaccines",
TRUE ~ TSBM_testing_recode))
# quick check:
print(content_recode[, c(TSBM_testing_columns,
"TSBM_testing_binary",
"TSBM_testing_recode",
"TSBM_testing_number")],
width=1000)
## # A tibble: 25 × 7
## TSBM_testing_clinical TSBM_testing_public_health TSBM_testing_economic
## <dbl> <dbl> <dbl>
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 1 0
## 5 0 0 0
## 6 0 1 1
## 7 0 1 0
## 8 0 0 0
## 9 0 0 0
## 10 0 0 0
## TSBM_testing_policy TSBM_testing_binary
## <dbl> <dbl>
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 1
## 5 0 0
## 6 0 1
## 7 0 1
## 8 0 0
## 9 0 0
## 10 0 0
## TSBM_testing_recode TSBM_testing_number
## <chr> <dbl>
## 1 no_TSBM_vaccines 0
## 2 no_TSBM_vaccines 0
## 3 no_TSBM_vaccines 0
## 4 TSBM_testing_public_health 1
## 5 no_TSBM_vaccines 0
## 6 TSBM_testing_public_health.TSBM_testing_economic 2
## 7 TSBM_testing_public_health 1
## 8 no_TSBM_vaccines 0
## 9 no_TSBM_vaccines 0
## 10 no_TSBM_vaccines 0
## # … with 15 more rows
table(content_recode$TSBM_testing_binary,
content_recode$TSBM_testing_number)
##
## 0 1 2
## 0 17 0 0
## 1 0 6 2
table(content_recode$TSBM_testing_binary,
content_recode$TSBM_testing_recode)
##
## no_TSBM_vaccines TSBM_testing_clinical.TSBM_testing_economic
## 0 17 0
## 1 0 1
##
## TSBM_testing_public_health TSBM_testing_public_health.TSBM_testing_economic
## 0 0 0
## 1 6 1
# rename some health variables:
content_recode <- content_recode %>%
rename(Neurological = `Neurological disorders (includes Guillain-Barre syndrome)`,
HIV = `HIV/AIDS`,
HepC = `Hepatitis C`,
Diabetes = `Diabetes, Hyperglycemia or Hypoglycemia`)
#reduce to key variables of interest:
vars <- c("pmid",
"UP_urban_rural",
"type_of_analysis" ,
"health_equity_addressed",
"UP_age",
"target_pop_children",
"target_pop_elders",
"UP_SES",
"target_pop_SES",
"UP_HCW",
"target_pop_HCW",
"UP_teacher",
"target_pop_teacher",
"target_pop_urban",
"target_pop_rural",
"UP_race",
"target_race_binary",
"target_pop_Black",
"target_pop_Latinx",
"target_pop_Indigenous",
"target_pop_Asian",
"study_site",
colnames(design_recode),
colnames(engage_recode),
TSBM_vaccine_columns,
"TSBM_vaccines_binary",
"TSBM_vaccines_recode",
"TSBM_vaccines_number",
TSBM_testing_columns,
"TSBM_testing_binary",
"TSBM_testing_recode",
"TSBM_testing_number",
"Neurological",
"HIV",
"HepC",
"Diabetes",
"Obesity",
"Preeclampsia")
content_recode <- content_recode %>%
select(all_of(vars))
################################################################################
# read in (or import) recoded Publication *.csv file:
pubs <- read_csv("2021.09.23 Publications_with_date_variables.csv") %>%
# duplicate PMIDs may be associated more than one project, but we only need them once since we're only interested in the attributes of the paper. if the following variables are distinct (i.e., unique PubMed ID numbers), then keep the values (?):
distinct(pmid, .keep_all = T)
## Rows: 25 Columns: 44
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (29): method, pi_accept, acknow_nih, acknow_radx, acknow_grant, covid, ...
## dbl (10): proj_num, cycle, year_cite, day, volume, issue, cite_count, pmid,...
## lgl (2): fund5, fund6
## dttm (2): date_id, date_update
## date (1): date_publish
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pubs <- pubs %>%
# select useful columns to keep in the dataframe:
select(cycle, year_cite, month, day,
date_publish, weeks_from_start, months_from_start, pmid, title)
################################################################################
# merge content analysis data onto publication data and export
pub_attributes <- left_join(x = content_recode, y = pubs, by = "pmid")
#only keep publication if on pub-author edgelist?
temp_edges <- read_csv("2021.09.23 Author_Affiliations_no_missing_ids.csv")
## Rows: 209 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): author_name, authors_aff, author_type
## dbl (3): pmid, author_id, author_id2
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# sort pub dataframe by pmid variable (?):
pub_attributes <- pub_attributes %>%
filter(pmid %in% temp_edges$pmid)
# output (or export) the publication_attributes dataframe you created to a *.csv file:
write_csv(pub_attributes, "2021.09.23 Publication_Attribute_Data.csv")
# Goal: to output a list showing which projects are associated with each publication (pmid)
# install (for the first time only) and load packages
#install.packages("tidyverse")
#install.packages("readxl")
#install.packages("lubridate")
#library(tidyverse)
#library(readxl)
#library(lubridate)
# read in (or import) recoded Publications *.csv file
#pubs <- read_csv("2021.09.23 Publications_with_date_variables.csv")
# only keep pubs that are in the content data set? read in the content analysis data for the publications:
#content <- read_excel("2021.09.23 Content_Analysis-CLEAN.xlsx")
# sort pubs dataframe by pmid variable (?):
pubs <- pubs %>%
filter(pmid %in% content$pmid)
# create a function (i.e., which_proj) to grab the project number, for a given variable (e.g., pmid):
which_proj <- function(dat, var, id){
dat$proj_num[dat[, var] == id]
}
# define unique set of pmids:
pmids <- unique(pubs$pmid)
# create a data frame (?) (i.e., list_of_projects) that grabs all projects associated with a pmid:
list_of_projects <- lapply(pmids, which_proj, dat = pubs, var = "pmid")
### not sure what to do with missing column: Warning: Unknown or uninitialised column: `proj_num`.
names(list_of_projects) <- pmids
# save out list of projects for each pmid:
save(list_of_projects, file = "2021.09.23 Project_Publication_List.Rdata")
# Goal: to produce some very initial network images and summary statistics
# with a focus on organizational homophily, or connections # based on author-publication ties,
# between different types of organizations.
# Questions to answer:
# How rare/common are cross organizational ties?
# What pattern of cross-organizational ties do we tend to see?
# Do we ever see ties involving community partners?
# Under what conditions do we see ties involving community partners?
# install (for the first time only) and load packages:
#install.packages("tidyverse")
#install.packages("igraph")
#install.packages("qgraph")
library(tidyverse)
library(igraph)
library(qgraph)
# load (or import) igraph file:
load("2021.09.23 Coauthorship Network igraph.Rdata")
# extract the affiliation matrix:
affil_matrix <- as_incidence_matrix(coauthorship_net, sparse = F)
type <- V(coauthorship_net)$type
#identify which nodes are authors:
is_author_type <- type == FALSE
#identify which nodes are publications:
is_publication_type <- type == TRUE
####################################
# Basic features of network #
# (to understand and plot network) #
####################################
# conduct component analysis, looking at reachability
components <- components(coauthorship_net)
# identify which components (and thus nodes) are based on single paper,
# i.e., not so interesting a structure to examine.
num_pubs <- function(mems, types = type, x) {
sum(types[which(mems == x)])
}
num_pubs_in_comp <- unlist(lapply(1:components$no, FUN = num_pubs,
mems = components$membership, types=type))
# a quick check:
# which(components$membership==3)
#############################
# Initial exploratory plots #
#############################
# Three versions of how we can plot:
# A) author-publication
# B) author-author
# C) publication-publication
#1. Node size and color in a basic plot of A)
# take off node labels:
V(coauthorship_net)$label <- NA
# set size of all nodes to 6:
#V(coauthorship_net)$size <- 6
# or make the author nodes smaller than the publication nodes:
V(coauthorship_net)$size[is_author_type] <- 1.75
# make the publication nodes larger than the author nodes:
V(coauthorship_net)$size[is_publication_type] <- 3.75
# set the color, by node type:
# authors (or FALSE) to UNC blue
unc_blue <- rgb(red = 75, green = 156, blue = 211,
alpha = 1 * 255, maxColorValue = 255)
# pubs (or TRUE) to Duke blue
duke_blue <- rgb(red = 0, green = 83, blue = 155,
alpha = 1 * 255, maxColorValue = 255)
# set how each node will be colored:
base_color <- case_when(is_author_type ~ unc_blue,
is_publication_type ~ duke_blue)
V(coauthorship_net)$color <- base_color
# set node borders equal to fill:
V(coauthorship_net)$frame.color <- V(coauthorship_net)$color
# other possible options:
# lighten up the edges a bit?
#E(coauthorship_net)$color <- rgb(.5, .5, .5, .25)
# make authors more transparent?
#V(affil_net96_noisolates)$color[is_student_type] <- rgb(1, 0, 0, .1)
plot(coauthorship_net)
##################################################################
#2. Add a layout and a legend
set.seed(109)
layout1 <- layout_with_fr(coauthorship_net)
# create a blank, placeholder PDF:
pdf(paste("2021.09.23 author_publication_network.pdf"))
#plot the network with the layout
plot(coauthorship_net, layout = layout1, margin = 0,
main = "Author-Publication Network, Base Plot")
# for legend with same-size nodes:
#legend("bottomright", col = c(unc_blue, duke_blue), legend = c("Author", "Publication"),
# pch = 21, pt.bg = c(unc_blue, duke_blue))
# for legend with actual-size nodes:
size_vec <-sort(unique(V(coauthorship_net)$size))
scaled <- 1 + ((2-1) * (size_vec - min(size_vec) ) / ( max(size_vec) - min(size_vec) ) )
legend("bottomleft", col = c(unc_blue, duke_blue), legend = c("Author", "Publication"),
pch = 21, pt.bg = c(unc_blue, duke_blue), pt.cex = scaled, cex = .9)
# close off graphic device:
dev.off()
## png
## 2
##################################################################
#3. Add titles for deep dive figures:
V(coauthorship_net)$vertex.label <- V(coauthorship_net)$name
V(coauthorship_net)$vertex.label[V(coauthorship_net)$type==FALSE] <-NA
plot(coauthorship_net, layout = layout1, margin = 0,
main = "Author-Publication Network, Base Plot",
vertex.label = V(coauthorship_net)$vertex.label,
vertex.label.cex=.8)
pdf(paste("2021.09.23 author_publication_network_with_labels.pdf"))
#if wanted to try and tweak the layout 'by hand' using ported network package code:
#e <- get.edgelist(coauthorship_net, names=FALSE)
#l <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(coauthorship_net),
# area = 1.5*(vcount(coauthorship_net)^2),
# repulse.rad = (vcount(coauthorship_net)^3.0))
#plot(coauthorship_net, layout = l, margin = 0,
# main = "Author-Publication Network, Base Plot")
#############################################################
# 4. set the node color, by type of affiliation:
# academic is UNC blue
# community is orange
# government is dark gray
# healthcare is eggplant
# professional is teal
# school spring green (?)
# pub is still duke blue
orange <- rgb(red = 244, green = 126, blue = 40,
alpha = 1 * 255, maxColorValue = 255)
dark_gray <- rgb(red = 71, green = 77, blue = 87,
alpha = 1 * 255, maxColorValue = 255)
eggplant <- rgb(red = 81, green = 12, blue = 93,
alpha = 1 * 255, maxColorValue = 255)
teal <- rgb(red = 20, green = 169, blue = 162,
alpha = 1 * 255, maxColorValue = 255)
spring_green <- rgb(red = 157, green = 200, blue = 88,
alpha = 1 * 255, maxColorValue = 255)
color_org_based <- case_when(V(coauthorship_net)$final_type == "Academic" ~ unc_blue,
V(coauthorship_net)$final_type == "Community" ~ orange,
V(coauthorship_net)$final_type == "Government" ~ dark_gray,
V(coauthorship_net)$final_type == "Healthcare" ~ eggplant,
V(coauthorship_net)$final_type == "Professional" ~ teal,
V(coauthorship_net)$final_type == "School" ~ spring_green,
is.na(V(coauthorship_net)$final_type) ~ duke_blue)
V(coauthorship_net)$color <- color_org_based
V(coauthorship_net)$frame.color <- V(coauthorship_net)$color
pdf(paste("2021.09.23 author_publication_network_colored_by_author_affiliation.pdf"))
plot(coauthorship_net, layout = layout1, margin = c(0, .0, 0, 0),
main = "Author-Publication Network, by Type of Affiliation")
col_list2 <- c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue)
scaled2 <- c(rep(scaled[1], length(col_list2) - 1), scaled[2])
legend("bottomleft", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School", "Publication"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = scaled2, cex = .80)
dev.off()
## png
## 2
#############################################################
# 5. Reduce plot to components, with 2 or more publications
#identify which nodes are in components that have less than 2 publications
multi_pubs <-components$membership %in% which(num_pubs_in_comp < 2)
V(coauthorship_net)$multi_pubs <- multi_pubs
V(coauthorship_net)$comp <- components$membership
ids_multi_pubs <- which(multi_pubs)
# create a subgraph of the full network that include nodes in components w/ 2+ pubs:
coauthorship_net_2pubs <- delete_vertices(coauthorship_net, ids_multi_pubs )
set.seed(113)
layout2 <- layout_with_fr(coauthorship_net_2pubs)
pdf(paste("2021.09.23 author_publication_network_2pubs_in_component.pdf"))
plot(coauthorship_net_2pubs, layout = layout2, margin = 0,
main = "Author-Publication Network, Components with >= 2 Publications")
legend("bottomleft", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School", "Publication"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = scaled2, cex = .80)
dev.off()
## png
## 2
#############################################################
# B) the author-author network
# construct the one-mode projection:
onemode_nets <- bipartite_projection(coauthorship_net)
# and grab the author-author network:
author_net <- onemode_nets$proj1
# look at the weighted matrix:
author_mat <- as_adjacency_matrix(graph = author_net, attr = "weight", sparse = F)
table(author_mat)
## author_mat
## 0 1 2 3 4
## 29543 2416 62 18 2
# 1. plot just authors, with weights
plot(author_net , layout = layout_with_fr, margin = 0,
edge.width = E(author_net)$weight)
# 2. plot binary network, no weights, and make nodes smaller with lighter borders
V(author_net)$size <- 1
E(author_net)$color <- rgb(.5, .5, .5, .10)
plot(author_net , layout = layout_with_fr, margin = 0,
edge.width = 1)
# 3. plot the nth largest components
set_number_ofcomponents <- 10
largest_components <- which(components$csize %in% sort(components$csize, decreasing=T)[1:set_number_ofcomponents ])
author_net_largecomponents <- delete_vertices(author_net,
which( !(V(author_net)$comp %in% largest_components) ))
V(author_net_largecomponents)$size <- 1.5
E(author_net_largecomponents)$color <- rgb(.5, .5, .5, .10)
plot(author_net_largecomponents, layout = layout_with_fr, margin = 0,
edge.width = 1)
# 4a. look at nodes in component with at least 2 pubs:
author_net_2pubs <- delete_vertices(author_net,
which( V(author_net)$multi_pubs) )
set.seed(10)
layout3 <- layout_with_fr(author_net_2pubs)
pdf(paste("2021.09.23 author_author_network_2pubs_in_component.pdf"))
plot(author_net_2pubs, layout = layout3, margin = 0,
edge.width = 1,
main = "Author-Author Network, Components with >= 2 Publications")
legend("bottomleft", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = 1, cex = .80)
dev.off()
## png
## 2
#############################################################
#alternative layout?
e <- get.edgelist(author_net_2pubs, names=FALSE)
#default:
#l <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(author_net_2pubs),
# area = 1*(vcount(author_net_2pubs)^2),
# repulse.rad = (vcount(author_net_2pubs)^3.0))
#increasing area a bit:
l <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(author_net_2pubs),
area = 2.7*(vcount(author_net_2pubs)^2),
repulse.rad = (vcount(author_net_2pubs)^3.0))
pdf(paste("2021.09.23 author_author_network_2pubs_in_component_alt_layout.pdf"))
plot(author_net_2pubs, layout = l, margin = 0,
edge.width = 1,
main = "Author-Author Network, Components with >= 2 Publications")
legend("bottomright", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = 1, cex = .80)
dev.off()
## png
## 2
#############################################################
# 4b. look at nodes in component with at least 2 pubs (one color for nodes):
pdf(paste("2021.09.23 author_author_network_2pubs_in_component_one_color.pdf"))
plot(author_net_2pubs, layout = l, margin = 0,
edge.width = 1,
main = "Author-Author Network, Components with >= 2 Publications",
vertex.color = unc_blue, vertex.frame.color = unc_blue)
dev.off()
## png
## 2
#############################################################
# 5. plot all nodes with alternative layout:
e <- get.edgelist(author_net, names=FALSE)
l2 <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(author_net),
area = 2.7*(vcount(author_net)^2),
repulse.rad = (vcount(author_net)^3.0))
pdf(paste("2021.09.23 author_author_network_alt_layout.pdf"))
plot(author_net, layout = l2, margin = 0,
edge.width = 1,
main = "Author-Author Network, Colored by Organization")
legend("bottomleft", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = 1, cex = .80)
dev.off()
## png
## 2
#############################################################
#same thing but one color
pdf(paste("2021.09.23 author_author_network_alt_layout_one_color.pdf"))
plot(author_net, layout = l2, margin = 0,
edge.width = 1,
main = "Author-Author Network, Colored by Organization",
vertex.color = unc_blue, vertex.frame.color = unc_blue)
dev.off()
## png
## 2
#############################################################
# 6. Just the biggest component?
biggest_comp <- which(components$csize == max(components$csize))
author_net_maincomponent <- delete_vertices(author_net,
which( V(author_net)$comp != biggest_comp) )
V(author_net_maincomponent)$size <- 1.5
E(author_net_maincomponent)$color <- rgb(.5, .5, .5, .10)
set.seed(113)
layout4 <- layout_with_fr(author_net_maincomponent)
pdf(paste("2021.09.23 author_author_network_largest_component.pdf"))
plot(author_net_maincomponent, layout = layout4, margin = 0,
edge.width = 1, main = "Author-Author Network, Largest Component")
legend("bottomleft", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = 1, cex = .80)
dev.off()
## png
## 2
#############################################################
# 7. Include just repeat interactions, so coauthors at least twice as edge, remove all others
# keep those edges that have weights>1:
author_net_repeat <- delete_edges(author_net, which(E(author_net)$weight<2))
# delete any authors with no ties:
author_net_repeat <- delete_vertices(author_net_repeat,
which(degree(author_net_repeat)<1))
# plot:
#E(author_net_repeat)$color <- rgb(.5, .5, .5, .10)
e <- get.edgelist(author_net_repeat, names=FALSE)
#default:
#l <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(author_net_2pubs),
# area = 1*(vcount(author_net_2pubs)^2),
# repulse.rad = (vcount(author_net_2pubs)^3.0))
#increasing area a bit:
l3 <- qgraph.layout.fruchtermanreingold(e, vcount=vcount(author_net_repeat),
area = 2.7*(vcount(author_net_repeat)^2),
repulse.rad = (vcount(author_net_repeat)^3.0))
E(author_net_repeat)$color = "light gray"
V(author_net_repeat)$size = 2
pdf(paste("2021.09.23 author_author_network_repeat_collaborations.pdf"))
plot(author_net_repeat , margin = 0,
edge.width = 1, layout = l3)
legend("bottomleft", col = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green),
legend = c("Academic", "Community", "Government", "Healthcare", "Professional", "School"),
pch = 21, pt.bg = c(unc_blue, orange, dark_gray, eggplant, teal, spring_green, duke_blue),
pt.cex = 1, cex = .80)
dev.off()
## png
## 2
#############################################################
#8. plot author-author network, by publications:
author_net
## IGRAPH 78b4f22 UNW- 179 1249 --
## + attr: name (v/c), final_type (v/c), title (v/c), label (v/l), size
## | (v/n), color (v/c), frame.color (v/c), vertex.label (v/c), multi_pubs
## | (v/l), comp (v/n), weight (e/n), color (e/c)
## + edges from 78b4f22 (vertex names):
## [1] 57220026426--35452136000 57220026426--57220023616 57220026426--57220028740
## [4] 57220026426--57220036794 57220026426--57220038353 57220026426--57201635827
## [7] 35452136000--57220023616 35452136000--57220028740 35452136000--57220036794
## [10] 35452136000--57220038353 35452136000--57201635827 35452136000--36850389900
## [13] 35452136000--7006733030 35452136000--57226121039 35452136000--35737232200
## [16] 35452136000--57226101415 35452136000--7201375470 35452136000--57198326551
## + ... omitted several edges
edges_coauthor_net <- as_edgelist(coauthorship_net)
relabel_function <- function(edges_coauthor_net, ids){
lab <- paste(edges_coauthor_net[edges_coauthor_net[,1] == ids,2],
collapse = "_")
}
paper_ids <- unlist(lapply(V(author_net)$name, relabel_function,
edges_coauthor_net=edges_coauthor_net))
paper_ids_factor <- factor(paper_ids, labels=1:length(unique(paper_ids )))
V(author_net)$paper_ids_factor <- paper_ids_factor
paper_ids_dat <- tibble(paper_ids=paper_ids,
paper_ids_factor= paper_ids_factor )
pdf(paste("2021.09.23 author_network_with_pub_labels.pdf"))
plot(author_net, layout = l2, margin = 0,
edge.width = 1,
main = "Author-Author Network, lables",
vertex.color = unc_blue, vertex.frame.color = unc_blue,
vertex.label = paper_ids_factor, vertex.label.cex= .75)
dev.off()
## png
## 2
#############################################################
#paper_ids_dat %>% filter(paper_ids_factor == 30) %>% select(paper_ids)
#pubs %>% filter(pmid=="34010526") %>% select("title")