R Markdown

更多细节参考 tidytext/ quanteda/jiebar 官方教程 和课程分享链接

library(jiebaR)
## Warning: package 'jiebaR' was built under R version 4.0.3
## Loading required package: jiebaRD
## Warning: package 'jiebaRD' was built under R version 4.0.3
library(quanteda)
## Warning: package 'quanteda' was built under R version 4.0.3
## Package version: 2.1.2
## Parallel computing: 2 of 8 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View
library(readtext)
## Warning: package 'readtext' was built under R version 4.0.3
library(tidyverse)
## -- Attaching packages ------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   1.0.2
## √ tidyr   1.1.2     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts ---------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
setwd("E:/course/zf")
dat_txtmultiple2 <- readtext("*.txt",
                             docvarsfrom = "filenames",docvarnames = c( "PM","Year"),encoding ="UTF-8" )
                              
corp <- corpus(dat_txtmultiple2)
summary(corpus(dat_txtmultiple2), 5)
## Corpus consisting of 17 documents, showing 5 documents:
## 
##             Text Types Tokens Sentences     PM Year
##  华国锋_1978.txt  2965  19119       659 华国锋 1978
##  华国锋_1979.txt  2655  17330       504 华国锋 1979
##  华国锋_1980.txt  1454   8376       342 华国锋 1980
##  赵紫阳_1981.txt  2845  20135       671 赵紫阳 1981
##  赵紫阳_1982.txt  2728  18480       586 赵紫阳 1982
ch_stop <- stopwords("zh", source = "misc")

# tokenize
ch_toks <- corp %>% 
  tokens(remove_punct = TRUE) %>%
  tokens_remove(pattern = ch_stop)

# construct a dfm
ch_dfm <- dfm(ch_toks)
topfeatures(ch_dfm)
## 主义 经济 人民 建设 发展 社会 国家 生产 我国 工业 
## 1817 1562 1560 1518 1458 1456 1405 1338 1155 1143
features_dfm_inaug <- textstat_frequency(ch_dfm)
textplot_wordcloud(ch_dfm, min_count = 100, random_order = FALSE,
                   rotation = 0, max_words = 100,
                   min_size = 0.5, max_size = 2.8,
                   color = RColorBrewer::brewer.pal(8, "Dark2"))

#词频统计、画图
ch_dfm%>% 
  textstat_frequency(n = 10) %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency)) +
  geom_point() +
  coord_flip() +
  labs(x = NULL, y = "Frequency") +
  theme_minimal()

#词云
library(wordcloud2)
## Warning: package 'wordcloud2' was built under R version 4.0.3
wordcloud2(demoFreq)
ff<-features_dfm_inaug[features_dfm_inaug[, 2] > 400]
wordcloud2(demoFreq, size = 1,shape = 'pentagon')
wordcloud2(ff, size = 1,shape = 'star')
#图画效果参考wordcloud2帮助调整

#jiebar

tt = worker() 
segment("宝玉听了,喜不自禁,笑道:“待我放下书,帮你来收拾。”黛玉道:“什么
书?”宝玉见问,慌的藏了,便说道:“不过是《中庸》《大学》。”黛玉道:“你
又在我跟前弄鬼。趁早儿给我瞧瞧,好多着呢!”宝玉道:“妹妹,要论你我是不
怕的,你看了好歹别告诉人。真是好文章!你要看了,连饭也不想吃呢!”一面说,
一面递过去。黛玉把花具放下,接书来瞧,从头看去,越看越爱,不顿饭时,已看
了好几出了。但觉词句警人,馀香满口。一面看了,只管出神,心内还默默记诵。
宝玉笑道:“妹妹,你说好不好?”黛玉笑着点头儿。宝玉笑道:“我就是个‘多
愁多病的身’,你就是那‘倾国倾城的貌’。”黛玉听了,不觉带腮连耳的通红了,
登时竖起两道似蹙非蹙的眉,瞪了一双似睁非睁的眼,桃腮带怒,薄面含嗔,指着
宝玉道:“你这该死的,胡说了!好好儿的,把这些淫词艳曲弄了来,说这些混帐
话,欺负我。我告诉舅舅、舅母去!”说到“欺负”二字,就把眼圈儿红了,转身
就走。宝玉急了,忙向前拦住道:“好妹妹,千万饶我这一遭儿罢!要有心欺负你,
明儿我掉在池子里,叫个癞头鼋吃了去,变个大忘八,等你明儿做了‘一品夫人’
病老归西的时候儿,我往你坟上替你驼一辈子碑去。”说的黛玉“扑嗤”的一声笑
了,一面揉着眼,一面笑道:“一般唬的这么个样儿,还只管胡说。呸!原来也是
个‘银样蜡枪头’。”宝玉听了,笑道:“你说说,你这个呢?我也告诉去。”黛
玉笑道:“你说你会‘过目成诵’,难道我就不能‘一目十行’了?”宝玉一面收
书,一面笑道:“正经快把花儿埋了罢,别提那些个了。”二人便收拾落花。", tt)
##   [1] "宝玉"     "听"       "了"       "喜不自禁" "笑道"     "待"      
##   [7] "我"       "放下"     "书"       "帮"       "你"       "来"      
##  [13] "收拾"     "黛"       "玉"       "道"       "什么"     "书"      
##  [19] "宝玉"     "见"       "问"       "慌"       "的"       "藏"      
##  [25] "了"       "便"       "说道"     "不过"     "是"       "中庸"    
##  [31] "大学"     "黛"       "玉"       "道"       "你"       "又"      
##  [37] "在"       "我"       "跟前"     "弄鬼"     "趁早"     "儿"      
##  [43] "给"       "我"       "瞧瞧"     "好"       "多着呢"   "宝玉"    
##  [49] "道"       "妹妹"     "要论"     "你"       "我"       "是"      
##  [55] "不"       "怕"       "的"       "你"       "看"       "了"      
##  [61] "好歹"     "别"       "告诉"     "人"       "真是"     "好"      
##  [67] "文章"     "你"       "要"       "看"       "了"       "连饭"    
##  [73] "也"       "不想"     "吃呢"     "一面"     "说"       "一面"    
##  [79] "递过去"   "黛玉"     "把"       "花具"     "放下"     "接书来"  
##  [85] "瞧"       "从头"     "看去"     "越"       "看"       "越"      
##  [91] "爱"       "不"       "顿饭"     "时"       "已"       "看"      
##  [97] "了"       "好几"     "出"       "了"       "但觉"     "词句"    
## [103] "警人"     "馀"       "香"       "满口"     "一面"     "看"      
## [109] "了"       "只管"     "出神"     "心内"     "还"       "默默"    
## [115] "记诵"     "宝玉"     "笑道"     "妹妹"     "你"       "说"      
## [121] "好不好"   "黛"       "玉"       "笑"       "着"       "点头"    
## [127] "儿"       "宝玉"     "笑道"     "我"       "就是"     "个"      
## [133] "多"       "愁多"     "病的身"   "你"       "就是"     "那"      
## [139] "倾国倾城" "的貌"     "黛"       "玉"       "听"       "了"      
## [145] "不觉"     "带"       "腮"       "连"       "耳"       "的"      
## [151] "通红"     "了"       "登时"     "竖起"     "两道"     "似"      
## [157] "蹙"       "非"       "蹙"       "的"       "眉"       "瞪"      
## [163] "了"       "一双"     "似睁非"   "睁"       "的"       "眼"      
## [169] "桃腮带"   "怒"       "薄面"     "含"       "嗔"       "指着"    
## [175] "宝玉"     "道"       "你"       "这"       "该死"     "的"      
## [181] "胡说"     "了"       "好好儿"   "的"       "把"       "这些"    
## [187] "淫词艳曲" "弄"       "了"       "来"       "说"       "这些"    
## [193] "混帐"     "话"       "欺负"     "我"       "我"       "告诉"    
## [199] "舅舅"     "舅母"     "去"       "说"       "到"       "欺负"    
## [205] "二"       "字"       "就"       "把"       "眼圈儿"   "红"      
## [211] "了"       "转身"     "就"       "走"       "宝玉"     "急"      
## [217] "了"       "忙"       "向前"     "拦住"     "道"       "好"      
## [223] "妹妹"     "千万"     "饶"       "我"       "这"       "一遭"    
## [229] "儿"       "罢"       "要"       "有心"     "欺负"     "你"      
## [235] "明儿"     "我"       "掉"       "在"       "池子"     "里"      
## [241] "叫个"     "癞头"     "鼋"       "吃"       "了"       "去"      
## [247] "变个"     "大忘八"   "等"       "你"       "明儿"     "做了"    
## [253] "一品夫人" "病老"     "归西"     "的"       "时候"     "儿"      
## [259] "我往"     "你"       "坟"       "上"       "替"       "你"      
## [265] "驼"       "一辈子"   "碑"       "去"       "说"       "的"      
## [271] "黛"       "玉"       "扑嗤"     "的"       "一声"     "笑"      
## [277] "了"       "一面"     "揉"       "着眼"     "一面"     "笑道"    
## [283] "一般"     "唬"       "的"       "这么"     "个"       "样儿"    
## [289] "还"       "只管"     "胡说"     "呸"       "原来"     "也"      
## [295] "是"       "个"       "银"       "样"       "蜡"       "枪头"    
## [301] "宝玉"     "听"       "了"       "笑道"     "你"       "说"      
## [307] "说"       "你"       "这个"     "呢"       "我"       "也"      
## [313] "告诉"     "去"       "黛"       "玉笑道"   "你"       "说"      
## [319] "你"       "会"       "过目成诵" "难道"     "我"       "就"      
## [325] "不能"     "一目十行" "了"       "宝玉"     "一面"     "收"      
## [331] "书"       "一面"     "笑道"     "正经"     "快"       "把"      
## [337] "花儿"     "埋"       "了"       "罢"       "别提"     "那些"    
## [343] "个"       "了"       "二人"     "便"       "收拾"     "落花"
newword<-c("似蹙非蹙","似睁非睁")
new_user_word(tt, newword)
## [1] TRUE
segment("宝玉听了,喜不自禁,笑道:“待我放下书,帮你来收拾。”黛玉道:“什么
书?”宝玉见问,慌的藏了,便说道:“不过是《中庸》《大学》。”黛玉道:“你
又在我跟前弄鬼。趁早儿给我瞧瞧,好多着呢!”宝玉道:“妹妹,要论你我是不
怕的,你看了好歹别告诉人。真是好文章!你要看了,连饭也不想吃呢!”一面说,
一面递过去。黛玉把花具放下,接书来瞧,从头看去,越看越爱,不顿饭时,已看
了好几出了。但觉词句警人,馀香满口。一面看了,只管出神,心内还默默记诵。
宝玉笑道:“妹妹,你说好不好?”黛玉笑着点头儿。宝玉笑道:“我就是个‘多
愁多病的身’,你就是那‘倾国倾城的貌’。”黛玉听了,不觉带腮连耳的通红了,
登时竖起两道似蹙非蹙的眉,瞪了一双似睁非睁的眼,桃腮带怒,薄面含嗔,指着
宝玉道:“你这该死的,胡说了!好好儿的,把这些淫词艳曲弄了来,说这些混帐
话,欺负我。我告诉舅舅、舅母去!”说到“欺负”二字,就把眼圈儿红了,转身
就走。宝玉急了,忙向前拦住道:“好妹妹,千万饶我这一遭儿罢!要有心欺负你,
明儿我掉在池子里,叫个癞头鼋吃了去,变个大忘八,等你明儿做了‘一品夫人’
病老归西的时候儿,我往你坟上替你驼一辈子碑去。”说的黛玉“扑嗤”的一声笑
了,一面揉着眼,一面笑道:“一般唬的这么个样儿,还只管胡说。呸!原来也是
个‘银样蜡枪头’。”宝玉听了,笑道:“你说说,你这个呢?我也告诉去。”黛
玉笑道:“你说你会‘过目成诵’,难道我就不能‘一目十行’了?”宝玉一面收
书,一面笑道:“正经快把花儿埋了罢,别提那些个了。”二人便收拾落花。", tt)
##   [1] "宝玉"     "听"       "了"       "喜不自禁" "笑道"     "待"      
##   [7] "我"       "放下"     "书"       "帮"       "你"       "来"      
##  [13] "收拾"     "黛"       "玉"       "道"       "什么"     "书"      
##  [19] "宝玉"     "见"       "问"       "慌"       "的"       "藏"      
##  [25] "了"       "便"       "说道"     "不过"     "是"       "中庸"    
##  [31] "大学"     "黛"       "玉"       "道"       "你"       "又"      
##  [37] "在"       "我"       "跟前"     "弄鬼"     "趁早"     "儿"      
##  [43] "给"       "我"       "瞧瞧"     "好"       "多着呢"   "宝玉"    
##  [49] "道"       "妹妹"     "要论"     "你"       "我"       "是"      
##  [55] "不"       "怕"       "的"       "你"       "看"       "了"      
##  [61] "好歹"     "别"       "告诉"     "人"       "真是"     "好"      
##  [67] "文章"     "你"       "要"       "看"       "了"       "连饭"    
##  [73] "也"       "不想"     "吃呢"     "一面"     "说"       "一面"    
##  [79] "递过去"   "黛玉"     "把"       "花具"     "放下"     "接书来"  
##  [85] "瞧"       "从头"     "看去"     "越"       "看"       "越"      
##  [91] "爱"       "不"       "顿饭"     "时"       "已"       "看"      
##  [97] "了"       "好几"     "出"       "了"       "但觉"     "词句"    
## [103] "警人"     "馀"       "香"       "满口"     "一面"     "看"      
## [109] "了"       "只管"     "出神"     "心内"     "还"       "默默"    
## [115] "记诵"     "宝玉"     "笑道"     "妹妹"     "你"       "说"      
## [121] "好不好"   "黛"       "玉"       "笑"       "着"       "点头"    
## [127] "儿"       "宝玉"     "笑道"     "我"       "就是"     "个"      
## [133] "多"       "愁多"     "病的身"   "你"       "就是"     "那"      
## [139] "倾国倾城" "的貌"     "黛"       "玉"       "听"       "了"      
## [145] "不觉"     "带"       "腮"       "连"       "耳"       "的"      
## [151] "通红"     "了"       "登时"     "竖起"     "两道"     "似蹙非蹙"
## [157] "的"       "眉"       "瞪"       "了"       "一双"     "似睁非睁"
## [163] "的"       "眼"       "桃腮带"   "怒"       "薄面"     "含"      
## [169] "嗔"       "指着"     "宝玉"     "道"       "你"       "这"      
## [175] "该死"     "的"       "胡说"     "了"       "好好儿"   "的"      
## [181] "把"       "这些"     "淫词艳曲" "弄"       "了"       "来"      
## [187] "说"       "这些"     "混帐"     "话"       "欺负"     "我"      
## [193] "我"       "告诉"     "舅舅"     "舅母"     "去"       "说"      
## [199] "到"       "欺负"     "二"       "字"       "就"       "把"      
## [205] "眼圈儿"   "红"       "了"       "转身"     "就"       "走"      
## [211] "宝玉"     "急"       "了"       "忙"       "向前"     "拦住"    
## [217] "道"       "好"       "妹妹"     "千万"     "饶"       "我"      
## [223] "这"       "一遭"     "儿"       "罢"       "要"       "有心"    
## [229] "欺负"     "你"       "明儿"     "我"       "掉"       "在"      
## [235] "池子"     "里"       "叫个"     "癞头"     "鼋"       "吃"      
## [241] "了"       "去"       "变个"     "大忘八"   "等"       "你"      
## [247] "明儿"     "做了"     "一品夫人" "病老"     "归西"     "的"      
## [253] "时候"     "儿"       "我往"     "你"       "坟"       "上"      
## [259] "替"       "你"       "驼"       "一辈子"   "碑"       "去"      
## [265] "说"       "的"       "黛"       "玉"       "扑嗤"     "的"      
## [271] "一声"     "笑"       "了"       "一面"     "揉"       "着眼"    
## [277] "一面"     "笑道"     "一般"     "唬"       "的"       "这么"    
## [283] "个"       "样儿"     "还"       "只管"     "胡说"     "呸"      
## [289] "原来"     "也"       "是"       "个"       "银"       "样"      
## [295] "蜡"       "枪头"     "宝玉"     "听"       "了"       "笑道"    
## [301] "你"       "说"       "说"       "你"       "这个"     "呢"      
## [307] "我"       "也"       "告诉"     "去"       "黛"       "玉笑道"  
## [313] "你"       "说"       "你"       "会"       "过目成诵" "难道"    
## [319] "我"       "就"       "不能"     "一目十行" "了"       "宝玉"    
## [325] "一面"     "收"       "书"       "一面"     "笑道"     "正经"    
## [331] "快"       "把"       "花儿"     "埋"       "了"       "罢"      
## [337] "别提"     "那些"     "个"       "了"       "二人"     "便"      
## [343] "收拾"     "落花"
segment("当赤道留住雪花眼泪融掉细沙你肯珍惜我吗 如浮云陪伴天马公演一个童话 当配乐遗下结他画布忘掉了画 请想起我如绿草当这地球没有花 ", tt)
##  [1] "当"   "赤道" "留住" "雪花" "眼泪" "融掉" "细沙" "你"   "肯"   "珍惜"
## [11] "我"   "吗"   "如"   "浮云" "陪伴" "天马" "公演" "一个" "童话" "当"  
## [21] "配乐" "遗下" "结他" "画布" "忘掉" "了"   "画"   "请"   "想起" "我"  
## [31] "如"   "绿草" "当"   "这"   "地球" "没有" "花"

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.