#Package 나중에 자동 인스톨 및 로드할 코드 작성
Packages = c(
'tidyverse',
'readxl',
'psych',
'dplyr',
'tm',
'data.table',
'ggplot2',
'extrafont',
'slam',
'wordcloud'
)
for(p in Packages){
if(!require(p,character.only = TRUE)) install.packages(p)
library(p,character.only = TRUE)
}
## Loading required package: tidyverse
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## Loading required package: readxl
## Loading required package: psych
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## Loading required package: tm
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
## Loading required package: extrafont
## Registering fonts with R
## Loading required package: slam
##
## Attaching package: 'slam'
## The following object is masked from 'package:data.table':
##
## rollup
## Loading required package: wordcloud
## Loading required package: RColorBrewer
# System
set.seed(12345)
Sys.info()
## sysname
## "Darwin"
## release
## "18.7.0"
## version
## "Darwin Kernel Version 18.7.0: Thu Jun 20 18:42:21 PDT 2019; root:xnu-4903.270.47~4/RELEASE_X86_64"
## nodename
## "chadlab"
## machine
## "x86_64"
## login
## "chadchae"
## user
## "chadchae"
## effective_user
## "chadchae"
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] wordcloud_2.6 RColorBrewer_1.1-2 slam_0.1-45
## [4] extrafont_0.17 data.table_1.12.2 tm_0.7-6
## [7] NLP_0.2-0 psych_1.8.12 readxl_1.3.1
## [10] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.3
## [13] purrr_0.3.2 readr_1.3.1 tidyr_0.8.3
## [16] tibble_2.1.3 ggplot2_3.2.0 tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] tidyselect_0.2.5 xfun_0.8 haven_2.1.1 lattice_0.20-38
## [5] colorspace_1.4-1 generics_0.0.2 vctrs_0.2.0 htmltools_0.3.6
## [9] yaml_2.2.0 rlang_0.4.0 pillar_1.4.2 foreign_0.8-71
## [13] glue_1.3.1.9000 withr_2.1.2 modelr_0.1.4 munsell_0.5.0
## [17] gtable_0.3.0 cellranger_1.1.0 rvest_0.3.4 evaluate_0.14
## [21] knitr_1.23 parallel_3.6.0 Rttf2pt1_1.3.7 broom_0.5.2
## [25] Rcpp_1.0.1 scales_1.0.0 backports_1.1.4 jsonlite_1.6
## [29] mnormt_1.5-5 hms_0.5.0 digest_0.6.20 stringi_1.4.3
## [33] grid_3.6.0 cli_1.1.0 tools_3.6.0 magrittr_1.5
## [37] lazyeval_0.2.2 extrafontdb_1.0 crayon_1.3.4 pkgconfig_2.0.2
## [41] zeallot_0.1.0 xml2_1.2.0 lubridate_1.7.4 assertthat_0.2.1
## [45] rmarkdown_1.14 httr_1.4.0 rstudioapi_0.10 R6_2.4.0
## [49] nlme_3.1-140 compiler_3.6.0
Packages
## [1] "tidyverse" "readxl" "psych" "dplyr" "tm"
## [6] "data.table" "ggplot2" "extrafont" "slam" "wordcloud"
Sys.time()
## [1] "2019-07-24 17:16:17 EDT"
# 한글설정
#Sys.setlocale("LC_CTYPE", "ko_KR.UTF-8")
fonts()
## NULL
par(family='AppleMyungjo')
theme_set(theme_gray(base_family='AppleMyungjo'))
# Data Load
# 원래 키워드데이터에서 공백을ㅁ으로 치환 분석용데이터셋에는 옆으로 트렌포즈시켜서 저장, 저자제공키워드가 아닌 워드 네트워크의경우에는 공백을 그냥 놔두거나 _ 로 치환
data <- read_excel("./DATA/datadata.xlsx")
data<-as.data.table(data) head(data) data[data==""] <- NA head(data)
이부분은 텀-닥 매트릭스를 만드는 부분입니다.
tdm <- TermDocumentMatrix(Corpus(VectorSource(data)))
dtm <- DocumentTermMatrix(Corpus(VectorSource(data)))
dim(tdm)
## [1] 88 37
dim(dtm)
## [1] 37 88
inspect(dtm)
## <<DocumentTermMatrix (documents: 37, terms: 88)>>
## Non-/sparse entries: 232/3024
## Sparsity : 93%
## Maximal term length: 51
## Weighting : term frequency (tf)
## Sample :
## Terms
## Docs "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder",
## 1 0
## 10 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## Terms
## Docs "generalisedㅎanxietyㅎdisorder", "nonㅇclinicalㅎsampleㅎㅎ",
## 1 0 0
## 10 0 1
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 1
## 7 0 0
## 8 0 0
## 9 0 0
## Terms
## Docs "panicㅎdisorder", "posttraumaticㅎstressㅎdisorder",
## 1 0 0
## 10 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 1 1
## 7 0 0
## 8 0 0
## 9 0 0
## Terms
## Docs "socialㅎanxietyㅎdisorder", "specificㅎphobia",
## 1 0 0
## 10 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## Terms
## Docs c("majorㅎdepressiveㅎdisorder", na, na)
## 1 0 12 1
## 10 0 13 1
## 2 0 15 1
## 3 0 17 1
## 4 0 17 1
## 5 0 17 1
## 6 1 14 1
## 7 0 13 1
## 8 0 16 1
## 9 0 15 1
inspect(tdm)
## <<TermDocumentMatrix (terms: 88, documents: 37)>>
## Non-/sparse entries: 232/3024
## Sparsity : 93%
## Maximal term length: 51
## Weighting : term frequency (tf)
## Sample :
## Docs
## Terms 1 10 2 3 4 5 6
## "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder", 0 0 0 0 0 0 0
## "generalisedㅎanxietyㅎdisorder", 0 0 0 0 0 0 0
## "nonㅇclinicalㅎsampleㅎㅎ", 0 1 0 0 0 0 1
## "panicㅎdisorder", 0 0 0 0 0 0 1
## "posttraumaticㅎstressㅎdisorder", 0 0 0 0 0 0 1
## "socialㅎanxietyㅎdisorder", 0 0 0 0 0 0 0
## "specificㅎphobia", 0 0 0 0 0 0 0
## c("majorㅎdepressiveㅎdisorder", 0 0 0 0 0 0 1
## na, 12 13 15 17 17 17 14
## na) 1 1 1 1 1 1 1
## Docs
## Terms 7 8 9
## "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder", 0 0 0
## "generalisedㅎanxietyㅎdisorder", 0 0 0
## "nonㅇclinicalㅎsampleㅎㅎ", 0 0 0
## "panicㅎdisorder", 0 0 0
## "posttraumaticㅎstressㅎdisorder", 0 0 0
## "socialㅎanxietyㅎdisorder", 0 0 0
## "specificㅎphobia", 0 0 0
## c("majorㅎdepressiveㅎdisorder", 0 0 0
## na, 13 16 15
## na) 1 1 1
word.count<-as.array(rollup(tdm, 1))
word.order<-order(word.count, decreasing =T)[1:30]
freq.word<-word.order[1:30]
row.names(tdm[freq.word,])
## [1] "\"anxietyㅎdisorders\","
## [2] "\"depressiveㅎdisorders\","
## [3] "\"somatoformㅎdisorder\","
## [4] "c(\"depressiveㅎdisorders\","
## [5] "na)"
## [6] "na,"
## [7] "\"depressionㅎpast\","
## [8] "\"neverㅎdepressed\","
## [9] "c(\"currentㅎmajorㅎdepressiveㅎepisode\","
## [10] "c(\"eatingㅎdisordersㅎnotㅎotherwiseㅎspecified\","
## [11] "c(\"anxietyㅎdisorders\","
## [12] "c(\"nonㅇclinicalㅎsampleㅎㅎ\","
## [13] "\"nonㅇclinicalㅎsampleㅎㅎ\","
## [14] "\"panicㅎdisorder\","
## [15] "\"posttraumaticㅎstressㅎdisorder\","
## [16] "c(\"majorㅎdepressiveㅎdisorder\","
## [17] "\"anorexiaㅎnervosa\","
## [18] "\"avoidantㅇrestrictiveㅎfoodㅎintakeㅎdisorder\","
## [19] "\"bingeㅎeatingㅎdisorder\","
## [20] "\"bulimiaㅎnervosaㅎ\","
## [21] "c(\"otherwiseㅎspecifiedㅎfeedingㅎㅋㅎeatingㅎdisorders\","
## [22] "\"selfㅇreportedㅎproblemsㅎㅍㅎanxietyㅎㅋㅎdepression\","
## [23] "c(\"functionalㅎgastrointestinalㅎdisordersㅎ\","
## [24] "\"eatingㅎdisordersㅎnotㅎotherwiseㅎspecified\","
## [25] "c(\"anorexiaㅎnervosa\","
## [26] "\"aspergerㅎsyndrome\","
## [27] "\"autism\","
## [28] "\"paranoidㅎschizophrenia\","
## [29] "c(\"autism\","
## [30] "\"generalisedㅎanxietyㅎdisorder\","
freq <- sort(colSums(as.matrix(dtm)), decreasing=TRUE)
wf <- data.frame(word=names(freq), freq=freq)
table(freq)
## freq
## 1 2 3 4 5 6 8 9 36 505
## 57 12 7 2 3 2 2 1 1 1
wf
## word
## na, na,
## na) na)
## "socialㅎanxietyㅎdisorder", "socialㅎanxietyㅎdisorder",
## "nonㅇclinicalㅎsampleㅎㅎ", "nonㅇclinicalㅎsampleㅎㅎ",
## "generalisedㅎanxietyㅎdisorder", "generalisedㅎanxietyㅎdisorder",
## "panicㅎdisorder", "panicㅎdisorder",
## "specificㅎphobia", "specificㅎphobia",
## "posttraumaticㅎstressㅎdisorder", "posttraumaticㅎstressㅎdisorder",
## c("majorㅎdepressiveㅎdisorder", c("majorㅎdepressiveㅎdisorder",
## "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder", "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder",
## "anxietyㅎdisorders", "anxietyㅎdisorders",
## "obsessiveㅇcompulsiveㅎdisorder", "obsessiveㅇcompulsiveㅎdisorder",
## "somatoformㅎdisorder", "somatoformㅎdisorder",
## c("nonㅇclinicalㅎsampleㅎㅎ", c("nonㅇclinicalㅎsampleㅎㅎ",
## "bulimiaㅎnervosaㅎ", "bulimiaㅎnervosaㅎ",
## "schizoaffectiveㅎdisorderㅎ", "schizoaffectiveㅎdisorderㅎ",
## "schizophreniaㅎ", "schizophreniaㅎ",
## c("panicㅎdisorder", c("panicㅎdisorder",
## "majorㅎdepressiveㅎdisorder", "majorㅎdepressiveㅎdisorder",
## c("depressiveㅎdisorders", c("depressiveㅎdisorders",
## c("anxietyㅎdisorders", c("anxietyㅎdisorders",
## "anorexiaㅎnervosa", "anorexiaㅎnervosa",
## "personalityㅎdisorders", "personalityㅎdisorders",
## c("generalisedㅎanxietyㅎdisorder", c("generalisedㅎanxietyㅎdisorder",
## "anxietyㅎdisorderㅎnotㅎotherwiseㅎspecified", "anxietyㅎdisorderㅎnotㅎotherwiseㅎspecified",
## c("socialㅎanxietyㅎdisorder", c("socialㅎanxietyㅎdisorder",
## "psychoticㅎdisorderㅎnotㅎotherwiseㅎspecified", "psychoticㅎdisorderㅎnotㅎotherwiseㅎspecified",
## c("bipolarㅎdisorderㅎㅍㅎpsychosis", c("bipolarㅎdisorderㅎㅍㅎpsychosis",
## "bipolarㅎiㅎdisorderㅎㅍㅎpsychoticㅎfeatures", "bipolarㅎiㅎdisorderㅎㅍㅎpsychoticㅎfeatures",
## "substanceㅎuseㅎdisorders", "substanceㅎuseㅎdisorders",
## c("posttraumaticㅎstressㅎdisorder", c("posttraumaticㅎstressㅎdisorder",
## "depressiveㅎdisorders", "depressiveㅎdisorders",
## "depressionㅎpast", "depressionㅎpast",
## "neverㅎdepressed", "neverㅎdepressed",
## c("currentㅎmajorㅎdepressiveㅎepisode", c("currentㅎmajorㅎdepressiveㅎepisode",
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified",
## "avoidantㅇrestrictiveㅎfoodㅎintakeㅎdisorder", "avoidantㅇrestrictiveㅎfoodㅎintakeㅎdisorder",
## "bingeㅎeatingㅎdisorder", "bingeㅎeatingㅎdisorder",
## c("otherwiseㅎspecifiedㅎfeedingㅎㅋㅎeatingㅎdisorders", c("otherwiseㅎspecifiedㅎfeedingㅎㅋㅎeatingㅎdisorders",
## "selfㅇreportedㅎproblemsㅎㅍㅎanxietyㅎㅋㅎdepression", "selfㅇreportedㅎproblemsㅎㅍㅎanxietyㅎㅋㅎdepression",
## c("functionalㅎgastrointestinalㅎdisordersㅎ", c("functionalㅎgastrointestinalㅎdisordersㅎ",
## "eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", "eatingㅎdisordersㅎnotㅎotherwiseㅎspecified",
## c("anorexiaㅎnervosa", c("anorexiaㅎnervosa",
## "aspergerㅎsyndrome", "aspergerㅎsyndrome",
## "autism", "autism",
## "paranoidㅎschizophrenia", "paranoidㅎschizophrenia",
## c("autism", c("autism",
## "depersonalisationㅇderealisationㅎdisorder", "depersonalisationㅇderealisationㅎdisorder",
## "otherㅎdisorders", "otherㅎdisorders",
## c("affectiveㅎdisorders", c("affectiveㅎdisorders",
## c("auditoryㅎvocalㅎhallucinations", c("auditoryㅎvocalㅎhallucinations",
## "hypocondriasis", "hypocondriasis",
## "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychosis", "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychosis",
## c("schizophreniaㅎ", c("schizophreniaㅎ",
## "headache", "headache",
## c("specificㅎphobia", c("specificㅎphobia",
## "alcoholㅎabuse", "alcoholㅎabuse",
## "alcoholㅎdependence", "alcoholㅎdependence",
## "bipolarㅎiiㅎdisorder", "bipolarㅎiiㅎdisorder",
## "conductㅎdisorder", "conductㅎdisorder",
## "drugㅎabuse", "drugㅎabuse",
## "dysthymicㅎdisorder", "dysthymicㅎdisorder",
## "nicotineㅎdependence", "nicotineㅎdependence",
## "nonㅇclinicalㅎsampleㅎㅎ") "nonㅇclinicalㅎsampleㅎㅎ")
## "oppositionalㅎdefiantㅎdisorderㅎ", "oppositionalㅎdefiantㅎdisorderㅎ",
## c("bipolarㅎiㅎdisorderㅎ", c("bipolarㅎiㅎdisorderㅎ",
## "adjustmentㅎdisorders", "adjustmentㅎdisorders",
## "eatingㅎdisorders", "eatingㅎdisorders",
## "impulseㅎcontrolㅎdisorders", "impulseㅎcontrolㅎdisorders",
## "moodㅎdisorders", "moodㅎdisorders",
## "relationalㅎproblemsㅎㄱvㅇcodeㄴ", "relationalㅎproblemsㅎㄱvㅇcodeㄴ",
## c("depressiveㅎdisordersㅎ", c("depressiveㅎdisordersㅎ",
## "bipolarㅎdisorderㅎnotㅎotherwiseㅎspecified", "bipolarㅎdisorderㅎnotㅎotherwiseㅎspecified",
## "bipolarㅎiiㅎdisorderㅎㄱnonㅇpsychoticㄴ", "bipolarㅎiiㅎdisorderㅎㄱnonㅇpsychoticㄴ",
## "bipolarㅎiiㅎdisorderㅎㅍㅎpsychoticㅎfeatures", "bipolarㅎiiㅎdisorderㅎㅍㅎpsychoticㅎfeatures",
## "bipolarㅎiㅎdisorderㅎㄱnonㅇpsychoticㄴ", "bipolarㅎiㅎdisorderㅎㄱnonㅇpsychoticㄴ",
## "depressiveㅎdisorderㅎnotㅎotherwiseㅎspecified", "depressiveㅎdisorderㅎnotㅎotherwiseㅎspecified",
## "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychoticㅎfeatures", "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychoticㅎfeatures",
## "separationㅎanxietyㅎdisorder", "separationㅎanxietyㅎdisorder",
## c("firstㅎepisodeㅎpsychosis", c("firstㅎepisodeㅎpsychosis",
## "acuteㅎㅋㅎtransientㅎpsychoticㅎdisorders", "acuteㅎㅋㅎtransientㅎpsychoticㅎdisorders",
## "bipolarㅎmoodㅎdisorders", "bipolarㅎmoodㅎdisorders",
## "childhoodㄷadolescenceㅎonsetㅎdisorders", "childhoodㄷadolescenceㅎonsetㅎdisorders",
## "developmentalㅎdisorders", "developmentalㅎdisorders",
## "mentalㅎretardation", "mentalㅎretardation",
## "nonbipolarㅎmoodㅎdisorders", "nonbipolarㅎmoodㅎdisorders",
## "physiologicalㅎsyndromes", "physiologicalㅎsyndromes",
## c("clinicalㅎhighㅎriskㅎforㅎpsychosis", c("clinicalㅎhighㅎriskㅎforㅎpsychosis",
## freq
## na, 505
## na) 36
## "socialㅎanxietyㅎdisorder", 9
## "nonㅇclinicalㅎsampleㅎㅎ", 8
## "generalisedㅎanxietyㅎdisorder", 8
## "panicㅎdisorder", 6
## "specificㅎphobia", 6
## "posttraumaticㅎstressㅎdisorder", 5
## c("majorㅎdepressiveㅎdisorder", 5
## "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder", 5
## "anxietyㅎdisorders", 4
## "obsessiveㅇcompulsiveㅎdisorder", 4
## "somatoformㅎdisorder", 3
## c("nonㅇclinicalㅎsampleㅎㅎ", 3
## "bulimiaㅎnervosaㅎ", 3
## "schizoaffectiveㅎdisorderㅎ", 3
## "schizophreniaㅎ", 3
## c("panicㅎdisorder", 3
## "majorㅎdepressiveㅎdisorder", 3
## c("depressiveㅎdisorders", 2
## c("anxietyㅎdisorders", 2
## "anorexiaㅎnervosa", 2
## "personalityㅎdisorders", 2
## c("generalisedㅎanxietyㅎdisorder", 2
## "anxietyㅎdisorderㅎnotㅎotherwiseㅎspecified", 2
## c("socialㅎanxietyㅎdisorder", 2
## "psychoticㅎdisorderㅎnotㅎotherwiseㅎspecified", 2
## c("bipolarㅎdisorderㅎㅍㅎpsychosis", 2
## "bipolarㅎiㅎdisorderㅎㅍㅎpsychoticㅎfeatures", 2
## "substanceㅎuseㅎdisorders", 2
## c("posttraumaticㅎstressㅎdisorder", 2
## "depressiveㅎdisorders", 1
## "depressionㅎpast", 1
## "neverㅎdepressed", 1
## c("currentㅎmajorㅎdepressiveㅎepisode", 1
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 1
## "avoidantㅇrestrictiveㅎfoodㅎintakeㅎdisorder", 1
## "bingeㅎeatingㅎdisorder", 1
## c("otherwiseㅎspecifiedㅎfeedingㅎㅋㅎeatingㅎdisorders", 1
## "selfㅇreportedㅎproblemsㅎㅍㅎanxietyㅎㅋㅎdepression", 1
## c("functionalㅎgastrointestinalㅎdisordersㅎ", 1
## "eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 1
## c("anorexiaㅎnervosa", 1
## "aspergerㅎsyndrome", 1
## "autism", 1
## "paranoidㅎschizophrenia", 1
## c("autism", 1
## "depersonalisationㅇderealisationㅎdisorder", 1
## "otherㅎdisorders", 1
## c("affectiveㅎdisorders", 1
## c("auditoryㅎvocalㅎhallucinations", 1
## "hypocondriasis", 1
## "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychosis", 1
## c("schizophreniaㅎ", 1
## "headache", 1
## c("specificㅎphobia", 1
## "alcoholㅎabuse", 1
## "alcoholㅎdependence", 1
## "bipolarㅎiiㅎdisorder", 1
## "conductㅎdisorder", 1
## "drugㅎabuse", 1
## "dysthymicㅎdisorder", 1
## "nicotineㅎdependence", 1
## "nonㅇclinicalㅎsampleㅎㅎ") 1
## "oppositionalㅎdefiantㅎdisorderㅎ", 1
## c("bipolarㅎiㅎdisorderㅎ", 1
## "adjustmentㅎdisorders", 1
## "eatingㅎdisorders", 1
## "impulseㅎcontrolㅎdisorders", 1
## "moodㅎdisorders", 1
## "relationalㅎproblemsㅎㄱvㅇcodeㄴ", 1
## c("depressiveㅎdisordersㅎ", 1
## "bipolarㅎdisorderㅎnotㅎotherwiseㅎspecified", 1
## "bipolarㅎiiㅎdisorderㅎㄱnonㅇpsychoticㄴ", 1
## "bipolarㅎiiㅎdisorderㅎㅍㅎpsychoticㅎfeatures", 1
## "bipolarㅎiㅎdisorderㅎㄱnonㅇpsychoticㄴ", 1
## "depressiveㅎdisorderㅎnotㅎotherwiseㅎspecified", 1
## "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychoticㅎfeatures", 1
## "separationㅎanxietyㅎdisorder", 1
## c("firstㅎepisodeㅎpsychosis", 1
## "acuteㅎㅋㅎtransientㅎpsychoticㅎdisorders", 1
## "bipolarㅎmoodㅎdisorders", 1
## "childhoodㄷadolescenceㅎonsetㅎdisorders", 1
## "developmentalㅎdisorders", 1
## "mentalㅎretardation", 1
## "nonbipolarㅎmoodㅎdisorders", 1
## "physiologicalㅎsyndromes", 1
## c("clinicalㅎhighㅎriskㅎforㅎpsychosis", 1
# 2번이상 나온 키워드들 그리기
p <- ggplot(subset(wf, freq>3), aes(word, order(freq)))
p <- p + geom_bar(stat="identity")
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
p
# 키워드 그림 정렬
p <- ggplot(subset(wf, freq>3), aes(reorder(word, -freq), freq))
p <- p + geom_bar(stat="identity")
p <- p + theme(axis.text.x=element_text(angle=45, hjust=1))
p
# Word Assiciation
findAssocs(dtm, "학습공동체", corlimit = 0.1) # specifying a correlation limit of 0.1, 1 mean always come along
## $학습공동체
## numeric(0)
findAssocs(dtm, "평생학습", corlimit = 0.1)
## $평생학습
## numeric(0)
# Reducing Terms
#dtmss1 <- removeSparseTerms(dtm, 0.999)
#dtmss2 <- removeSparseTerms(dtm, 0.998)
#dtmss3 <- removeSparseTerms(dtm, 0.997)
#dtmss4 <- removeSparseTerms(dtm, 0.996)
#dtmss5 <- removeSparseTerms(dtm, 0.995)
#dim(dtmss1)
#dim(dtmss2)
#dim(dtmss3)
#dim(dtmss4)
#dim(dtmss5)
#dtmss<-removeSparseTerms(dtm, 0.999)
# Basic Network with 0.999, 79 terms
#tdmss <- removeSparseTerms(tdm, 0.999)
#dtmss <- removeSparseTerms(dtm, 0.999)
tdmss <- tdm
dtmss <- dtm
dim(dtmss)
## [1] 37 88
dim(tdmss)
## [1] 88 37
dtmss
## <<DocumentTermMatrix (documents: 37, terms: 88)>>
## Non-/sparse entries: 232/3024
## Sparsity : 93%
## Maximal term length: 51
## Weighting : term frequency (tf)
freq <- sort(colSums(as.matrix(dtmss)), decreasing=TRUE)
wf <- data.frame(word=names(freq), freq=freq)
tail(freq)
## "childhoodㄷadolescenceㅎonsetㅎdisorders",
## 1
## "developmentalㅎdisorders",
## 1
## "mentalㅎretardation",
## 1
## "nonbipolarㅎmoodㅎdisorders",
## 1
## "physiologicalㅎsyndromes",
## 1
## c("clinicalㅎhighㅎriskㅎforㅎpsychosis",
## 1
head(freq)
## na, na)
## 505 36
## "socialㅎanxietyㅎdisorder", "nonㅇclinicalㅎsampleㅎㅎ",
## 9 8
## "generalisedㅎanxietyㅎdisorder", "panicㅎdisorder",
## 8 6
require(igraph)
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
TDmatrix<-as.matrix(tdmss)
termMatrix <- TDmatrix %*% t(TDmatrix)
termMatrix[5:10,5:10]
## Terms
## Terms na) na,
## na) 36 505
## na, 505 7327
## "depressionㅎpast", 1 15
## "neverㅎdepressed", 1 15
## c("currentㅎmajorㅎdepressiveㅎepisode", 1 15
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 1 17
## Terms
## Terms "depressionㅎpast",
## na) 1
## na, 15
## "depressionㅎpast", 1
## "neverㅎdepressed", 1
## c("currentㅎmajorㅎdepressiveㅎepisode", 1
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 0
## Terms
## Terms "neverㅎdepressed",
## na) 1
## na, 15
## "depressionㅎpast", 1
## "neverㅎdepressed", 1
## c("currentㅎmajorㅎdepressiveㅎepisode", 1
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 0
## Terms
## Terms c("currentㅎmajorㅎdepressiveㅎepisode",
## na) 1
## na, 15
## "depressionㅎpast", 1
## "neverㅎdepressed", 1
## c("currentㅎmajorㅎdepressiveㅎepisode", 1
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 0
## Terms
## Terms c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified",
## na) 1
## na, 17
## "depressionㅎpast", 0
## "neverㅎdepressed", 0
## c("currentㅎmajorㅎdepressiveㅎepisode", 0
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified", 1
#heatmap(termMatrix)
g <- graph.adjacency(termMatrix, weighted=T, mode = "undirected")
g <- simplify(g)
freq <- colSums(as.matrix(dtmss))
freq
## "anxietyㅎdisorders",
## 4
## "depressiveㅎdisorders",
## 1
## "somatoformㅎdisorder",
## 3
## c("depressiveㅎdisorders",
## 2
## na)
## 36
## na,
## 505
## "depressionㅎpast",
## 1
## "neverㅎdepressed",
## 1
## c("currentㅎmajorㅎdepressiveㅎepisode",
## 1
## c("eatingㅎdisordersㅎnotㅎotherwiseㅎspecified",
## 1
## c("anxietyㅎdisorders",
## 2
## c("nonㅇclinicalㅎsampleㅎㅎ",
## 3
## "nonㅇclinicalㅎsampleㅎㅎ",
## 8
## "panicㅎdisorder",
## 6
## "posttraumaticㅎstressㅎdisorder",
## 5
## c("majorㅎdepressiveㅎdisorder",
## 5
## "anorexiaㅎnervosa",
## 2
## "avoidantㅇrestrictiveㅎfoodㅎintakeㅎdisorder",
## 1
## "bingeㅎeatingㅎdisorder",
## 1
## "bulimiaㅎnervosaㅎ",
## 3
## c("otherwiseㅎspecifiedㅎfeedingㅎㅋㅎeatingㅎdisorders",
## 1
## "selfㅇreportedㅎproblemsㅎㅍㅎanxietyㅎㅋㅎdepression",
## 1
## c("functionalㅎgastrointestinalㅎdisordersㅎ",
## 1
## "eatingㅎdisordersㅎnotㅎotherwiseㅎspecified",
## 1
## c("anorexiaㅎnervosa",
## 1
## "aspergerㅎsyndrome",
## 1
## "autism",
## 1
## "paranoidㅎschizophrenia",
## 1
## c("autism",
## 1
## "generalisedㅎanxietyㅎdisorder",
## 8
## "depersonalisationㅇderealisationㅎdisorder",
## 1
## "otherㅎdisorders",
## 1
## "personalityㅎdisorders",
## 2
## c("affectiveㅎdisorders",
## 1
## c("auditoryㅎvocalㅎhallucinations",
## 1
## "socialㅎanxietyㅎdisorder",
## 9
## c("generalisedㅎanxietyㅎdisorder",
## 2
## "agoraphobiaㅎㅍoutㅎhistoryㅎofㅎpanicㅎdisorder",
## 5
## "anxietyㅎdisorderㅎnotㅎotherwiseㅎspecified",
## 2
## "obsessiveㅇcompulsiveㅎdisorder",
## 4
## "specificㅎphobia",
## 6
## "hypocondriasis",
## 1
## c("socialㅎanxietyㅎdisorder",
## 2
## "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychosis",
## 1
## "psychoticㅎdisorderㅎnotㅎotherwiseㅎspecified",
## 2
## "schizoaffectiveㅎdisorderㅎ",
## 3
## "schizophreniaㅎ",
## 3
## c("bipolarㅎdisorderㅎㅍㅎpsychosis",
## 2
## "bipolarㅎiㅎdisorderㅎㅍㅎpsychoticㅎfeatures",
## 2
## c("schizophreniaㅎ",
## 1
## "headache",
## 1
## c("specificㅎphobia",
## 1
## c("panicㅎdisorder",
## 3
## "alcoholㅎabuse",
## 1
## "alcoholㅎdependence",
## 1
## "bipolarㅎiiㅎdisorder",
## 1
## "conductㅎdisorder",
## 1
## "drugㅎabuse",
## 1
## "dysthymicㅎdisorder",
## 1
## "majorㅎdepressiveㅎdisorder",
## 3
## "nicotineㅎdependence",
## 1
## "nonㅇclinicalㅎsampleㅎㅎ")
## 1
## "oppositionalㅎdefiantㅎdisorderㅎ",
## 1
## c("bipolarㅎiㅎdisorderㅎ",
## 1
## "adjustmentㅎdisorders",
## 1
## "eatingㅎdisorders",
## 1
## "impulseㅎcontrolㅎdisorders",
## 1
## "moodㅎdisorders",
## 1
## "relationalㅎproblemsㅎㄱvㅇcodeㄴ",
## 1
## "substanceㅎuseㅎdisorders",
## 2
## c("depressiveㅎdisordersㅎ",
## 1
## "bipolarㅎdisorderㅎnotㅎotherwiseㅎspecified",
## 1
## "bipolarㅎiiㅎdisorderㅎㄱnonㅇpsychoticㄴ",
## 1
## "bipolarㅎiiㅎdisorderㅎㅍㅎpsychoticㅎfeatures",
## 1
## "bipolarㅎiㅎdisorderㅎㄱnonㅇpsychoticㄴ",
## 1
## "depressiveㅎdisorderㅎnotㅎotherwiseㅎspecified",
## 1
## "majorㅎdepressiveㅎdisorderㅎㅍㅎpsychoticㅎfeatures",
## 1
## "separationㅎanxietyㅎdisorder",
## 1
## c("posttraumaticㅎstressㅎdisorder",
## 2
## c("firstㅎepisodeㅎpsychosis",
## 1
## "acuteㅎㅋㅎtransientㅎpsychoticㅎdisorders",
## 1
## "bipolarㅎmoodㅎdisorders",
## 1
## "childhoodㄷadolescenceㅎonsetㅎdisorders",
## 1
## "developmentalㅎdisorders",
## 1
## "mentalㅎretardation",
## 1
## "nonbipolarㅎmoodㅎdisorders",
## 1
## "physiologicalㅎsyndromes",
## 1
## c("clinicalㅎhighㅎriskㅎforㅎpsychosis",
## 1
V(g)$label <- V(g)$name
V(g)$freq <- freq
V(g)$degree <- degree(g)
#V(g)$size <- 2
#grep("Nanum", fonts(), value=T)
plot(g, family="AppleMyungjo", vertex.label=NA, layout=layout.fruchterman.reingold, edge.arrow.size=0)
#write_graph(g, "g.gml", "gml")