Análisis de las palabras claves de artículos científicos en el área biomédica (UV,USACH,UDEC)

Universidad de Valparaíso

Packages

library(dplyr)
library(tidytext)
library(tidyr)
library(wordcloud2)
library(tm)

Lectura de datos

data_uv <- read.csv("ICB_TM - BD UV.csv", sep = ",", header = TRUE)

Limpieza de datos

#Cambiar nombre de ciertas columnas
autores_uv <- data_uv$Nombre
keywords_uv <- as.character(tolower(data_uv$Index.keywords))
#Creamos un nuevo data frame con los datos que necesitamos
data_tidy <- data.frame(autores_uv,keywords_uv, data_uv$SI=="SI")
#Las keywords deben separarse dado que están delimitadas por un ";"
data_tidy <- data_tidy %>%
  mutate(keywords_uv = strsplit(as.character(keywords_uv),";"))%>%
  unnest(keywords_uv)

#Ahora, debemos limpiar las keywords de palabras que no necesitemos
palabras_innecesarias<-c(tm::stopwords(kind="en")," human"," female"," male"," humans"," adolescents",
                         " review"," prospective study"," article"," human"," young adult",
                         " controlled study", " priority journal", "young adult", " human",
                         "female","male","humans","adolescents", "review","prospective study",
                         "article","human","young adult", "adult", " adult",
                         "controlled study", "priority journal", "young adult", "human", 
                         "middle aged", " middle aged", " aged", "aged", "adolescent", " adolescent",
                         "clinical article", " clinical article")
data_tidy <- data_tidy %>%
  filter(!keywords_uv %in% palabras_innecesarias)

Nube de palabras

data_tidy %>%
  count(keywords_uv, sort=T)%>%
  top_n(200)%>%
  wordcloud2(size=0.5)

Universidad de Concepción

Packages

library(dplyr)
library(tidytext)
library(tidyr)
library(wordcloud2)
library(tm)

Lectura de datos

data_udec <- read.csv("ICB_TM - BD UdeC.csv", sep = ",", header = TRUE)

Limpieza de datos

#Cambiar nombre de ciertas columnas
autores_udec <- data_udec$Nombre
keywords_udec <- as.character(tolower(data_udec$Index.keywords))
#Creamos un nuevo data frame con los datos que necesitamos
data_tidy_udec <- data.frame(autores_udec,keywords_udec, data_udec$Filiación_UdeC=="SI")

#Las keywords deben separarse dado que están delimitadas por un ";"
data_tidy_udec <- data_tidy_udec %>%
  mutate(keywords_udec = strsplit(as.character(keywords_udec),";"))%>%
  unnest(keywords_udec)

#Ahora, debemos limpiar las keywords de palabras que no necesitemos
palabras_innecesarias<-c(tm::stopwords(kind="en")," human"," female"," male"," humans"," adolescents",
                         " review"," prospective study"," article"," human"," young adult",
                         " controlled study", " priority journal", "young adult", " human",
                         "female","male","humans","adolescents", "review","prospective study",
                         "article","human","young adult", "adult", " adult",
                         "controlled study", "priority journal", "young adult", "human", 
                         "middle aged", " middle aged", " aged", "aged", "adolescent", " adolescent",
                         "clinical article", " clinical article")
data_tidy_udec <- data_tidy_udec %>%
  filter(!keywords_udec %in% palabras_innecesarias)

Nube de palabras

data_tidy_udec %>%
  count(keywords_udec, sort=T)%>%
  top_n(200)%>%
  wordcloud2(size=0.5)

Universidad de Santiago

Packages

library(dplyr)
library(tidytext)
library(tidyr)
library(wordcloud2)
library(tm)

Lectura de datos

data_usach <- read.csv("ICB_TM - BD USACH.csv", sep = ",", header = TRUE)

Limpieza de datos

#Cambiar nombre de ciertas columnas
autores_usach <- data_usach$Nombre
keywords_usach <- as.character(tolower(data_usach$Index.keywords))
#Creamos un nuevo data frame con los datos que necesitamos
data_tidy_usach <- data.frame(autores_usach,keywords_usach, data_usach$Filiación_USACH=="SI")

#Las keywords deben separarse dado que están delimitadas por un ";"
data_tidy_usach <- data_tidy_usach %>%
  mutate(keywords_usach = strsplit(as.character(keywords_usach),";"))%>%
  unnest(keywords_usach)

#Ahora, debemos limpiar las keywords de palabras que no necesitemos
palabras_innecesarias<-c(tm::stopwords(kind="en")," human"," female"," male"," humans"," adolescents",
                         " review"," prospective study"," article"," human"," young adult",
                         " controlled study", " priority journal", "young adult", " human",
                         "female","male","humans","adolescents", "review","prospective study",
                         "article","human","young adult", "adult", " adult",
                         "controlled study", "priority journal", "young adult", "human", 
                         "middle aged", " middle aged", " aged", "aged", "adolescent", " adolescent",
                         "clinical article", " clinical article", "chile", " chile", "sheep",
                         " sheep", " mice", "mice", " animal experiment", "animal experiment", 
                         "animals", " animals", "animal", " animal", "mouse", " mouse")
data_tidy_usach <- data_tidy_usach %>%
  filter(!keywords_usach %in% palabras_innecesarias)

Nube de palabras

data_tidy_usach %>%
  count(keywords_usach, sort=T)%>%
  top_n(200)%>%
  wordcloud2(size=0.5)