¿Cómo han cambiado los tópicos de interés de las áreas de conocimiento durante cierto período de tiempo?
#Librerías
library(quanteda)
library(tidyr)
library(dplyr)
library(tidyverse)
library(quanteda.textstats)
library(quanteda.textplots)
library(pluralize)
datos_uv = read.csv("C:/Users/ureus/Downloads/ICB_TM - BD UV.csv", sep=",")
tesauro = read.csv("C:/Users/ureus/Downloads/IEEE_tesauro.csv", sep=";")
#Limpieza y edición del dataset
datos_uv$Index.keywords = tolower(datos_uv$Index.keywords)
tesauro$Area2 = str_trim(tesauro$Area2)
tesauro$Area2 = gsub(" ", "_", tesauro$Area2)
tesauro$Area2 = tolower(tesauro$Area2)
#Arreglo del dataset
datos_uv <- datos_uv%>%
mutate(Index.keywords = strsplit(as.character(Index.keywords),";"))%>%
unnest(Index.keywords)
#Eliminar espacios en blanco y juntar palabras separados por ellos.
datos_uv$Index.keywords = str_trim(datos_uv$Index.keywords)
datos_uv$Index.keywords = gsub(" ", "_", datos_uv$Index.keywords)
datos_uv$Index.keywords = singularize(datos_uv$Index.keywords)
#Eliminar palabras innecesarias
palabras_innecesarias<-c(tm::stopwords(kind="en")," human"," female"," male"," humans"," adolescents",
" review"," prospective study"," article"," human"," young adult",
" controlled study", " priority_journal", "young_adult", " human",
"female","male","humans","adolescents", "review","prospective study",
"article","human","young adult", "adult", " adult",
"controlled study", "priority journal", "young adult", "human",
"middle aged", " middle aged", " aged", "aged", "Aged", "adolescent", " adolescent",
"clinical article", " clinical article", "Older_adults",
"Non-intrusive", "child", "chilean", "chile","clinical_article",
"controlled_study","human_experiment","normal_human",
"rat","nonhuman","animal","wistar_rat","animals","infant",
"priority_journal","middle_age", "middle_aged", "animal_experiment")
datos_uv <- datos_uv %>%
filter(!Index.keywords %in% palabras_innecesarias) %>%
filter(SI =="SI" )
#Creación del corpus
corpus_datos_uv = corpus(datos_uv, text_field = "Index.keywords")
#Tokenización
toks_datos_uv = tokens(corpus_datos_uv,remove_punct = T,remove_symbols = T,
remove_numbers = T, remove_url = T,
remove_separators = T)
#Convertir a minúsculas
toks_datos_uv=tokens_tolower(toks_datos_uv)
uv_datos <- datos_uv %>%
inner_join(tesauro, by=c("Index.keywords" = "Area2"))
#Transformar en corpus
corpus_uv = corpus(uv_datos, text_field="Index.keywords")
corpus_uv = tokens(corpus_uv)
dfmat_datos_uv = dfm(corpus_uv)
#Graficar
ano = "2021"
acumulado = F
if (acumulado==F){
tstat_key = textstat_keyness(dfmat_datos_uv,target = dfmat_datos_uv$Año == ano)} else
{tstat_key = textstat_keyness(dfmat_datos_uv,target = dfmat_datos_uv$Año <= ano)}
grafico_area=textplot_keyness(tstat_key,labelsize = 4,n=10,margin = 0.6,
color = c("mediumaquamarine", "gray"))
plot(grafico_area)