기말 프로젝트

자유민주적인 루즈벨트 대통령과 독재자 히틀러 연설문 비교

데이터 준비

options(repos = c(CRAN = "https://cran.rstudio.com"))
file_path1 <- "C:\\Users\\chosun\\Desktop\\R\\roosevelt speech script.txt"
file_path2 <- "C:\\Users\\chosun\\Desktop\\R\\hit speech script.txt"

text1 <- tolower(readLines(file_path1, warn = FALSE))
text2 <- tolower(readLines(file_path2, warn = FALSE))

roo_text <- paste(text1, collapse = " ")
roo_text <- tolower(roo_text)
roo_text <- gsub("[[:punct:]]", "", roo_text)


hit_text <- paste(text2, collapse = " ")
hit_text <- tolower(hit_text)
hit_text <- gsub("[[:punct:]]", "", hit_text)

roo_words <- strsplit(roo_text, "\\s+")
roo_words <- unlist(roo_words)
roo_words <- roo_words[roo_words != ""]

hit_words <- strsplit(hit_text, "\\s+")
hit_words <- unlist(hit_words)
hit_words <- hit_words[hit_words != ""]

stop <- c(

  "a", "about", "above", "across", "after", "again", "against", "all", "almost", "alone",

  "along", "already", "also", "although", "always", "am", "among", "an", "and", "another",

  "any", "anybody", "anyone", "anything", "anywhere", "are", "area", "areas", "aren't", "around",

  "as", "ask", "asked", "asking", "asks", "at", "away", "b", "back", "backed", "backing", "backs",

  "be", "became", "because", "become", "becomes", "been", "before", "began", "behind", "being",

  "beings", "below", "best", "better", "between", "big", "both", "but", "by", "c", "came", "can",

  "cannot", "can't", "case", "cases", "certain", "certainly", "clear", "clearly", "come", "could",

  "couldn't", "d", "did", "didn't", "differ", "different", "differently", "do", "does", "doesn't",

  "doing", "done", "don't", "down", "downed", "downing", "downs", "during", "e", "each", "early",

  "either", "end", "ended", "ending", "ends", "enough", "even", "evenly", "ever", "every", "everybody",

  "everyone", "everything", "everywhere", "f", "face", "faces", "fact", "facts", "far", "felt", "few",

  "find", "finds", "first", "for", "four", "from", "full", "fully", "further", "furthered", "furthering",

  "furthers", "g", "gave", "general", "generally", "get", "gets", "give", "given", "gives", "go", "going",

  "good", "goods", "got", "great", "greater", "greatest", "group", "grouped", "grouping", "groups", "h",

  "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "her", "here",

  "here's", "hers", "herself", "he's", "high", "higher", "highest", "him", "himself", "his", "how",

  "however", "how's", "i", "i'd", "if", "i'll", "i'm", "important", "in", "interest", "interested",

  "interesting", "interests", "into", "is", "isn't", "it", "its", "it's", "itself", "i've", "j", "just",

  "k", "keep", "keeps", "kind", "knew", "know", "known", "knows", "l", "large", "largely", "last", "later",

  "latest", "least", "less", "let", "lets", "let's", "like", "likely", "long", "longer", "longest", "m",

  "made", "make", "making", "man", "many", "may", "me", "member", "members", "men", "might", "more", "most",

  "mostly", "mr", "mrs", "much", "must", "mustn't", "my", "myself", "n", "necessary", "need", "needed",

  "needing", "needs", "never", "new", "newer", "newest", "next", "no", "nobody", "non", "noone", "nor",

  "not", "nothing", "now", "nowhere", "number", "numbers", "o", "of", "off", "often", "old", "older",

  "oldest", "on", "once", "one", "only", "open", "opened", "opening", "opens", "or", "order", "ordered",

  "ordering", "orders", "other", "others", "ought", "our", "ours", "ourselves", "out", "over", "own", "p",

  "part", "parted", "parting", "parts", "per", "perhaps", "place", "places", "point", "pointed", "pointing",

  "points", "possible", "present", "presented", "presenting", "presents", "problem", "problems", "put", "puts",

  "q", "quite", "r", "rather", "really", "right", "room", "rooms", "s", "said", "same", "saw", "say", "says",

  "second", "seconds", "see", "seem", "seemed", "seeming", "seems", "sees", "several", "shall", "shan't", "she",

  "she'd", "she'll", "she's", "should", "shouldn't", "show", "showed", "showing", "shows", "side", "sides",

  "since", "small", "smaller", "smallest", "so", "some", "somebody", "someone", "something", "somewhere",

  "state", "states", "still", "such", "sure", "t", "take", "taken", "than", "that", "that's", "the", "their",

  "theirs", "them", "themselves", "then", "there", "therefore", "there's", "these", "they", "they'd", "they'll",

  "they're", "they've", "thing", "things", "think", "thinks", "this", "those", "though", "thought", "thoughts",

  "three", "through", "thus", "to", "today", "together", "too", "took", "toward", "turn", "turned", "turning",

  "turns", "two", "u", "under", "until", "up", "upon", "us", "use", "used", "uses", "v", "very", "w", "want",

  "wanted", "wanting", "wants", "was", "wasn't", "way", "ways", "we", "we'd", "well", "we'll", "wells", "went",

  "were", "we're",   "weren't", "we've", "what", "what's", "when", "when's", "where", "where's", "whether", "which", "while",

  "who", "whole", "whom", "who's", "whose", "why", "why's", "will", "with", "within", "without", "won't",

  "work", "worked", "working", "works", "would", "wouldn't", "x", "y", "year", "years", "yes"

  , "yet", "you",

  "you'd", "you'll", "young", "younger", "youngest", "your", "you're", "yours", "yourself", "yourselves", "you've", "z", "unto", "thou", "thy", "thee")

단어 빈도표 만들기

roo_words[!roo_words %in% stop] -> roo_words
word_freq1 <- table(roo_words)
sorted_word_freq1 <- sort(word_freq1, decreasing = TRUE)
print(sorted_word_freq1[1:20])

## roo_words
##   national     people     action     helped leadership      world      money 
##         10          8          7          7          7          6          5 
##     nation   congress       days discipline       duty    efforts  emergency 
##          5          4          4          4          4          4          4 
##       hand   measures     public   respects       task       time 
##          4          4          4          4          4          4

hit_words[!hit_words %in% stop] -> hit_words
word_freq2 <- table(hit_words)
sorted_word_freq2 <- sort(word_freq2, decreasing = TRUE)
print(sorted_word_freq2[1:20])

## hit_words
##     german  proposals     danzig    germany government   peaceful     poland 
##         13         13          7          7          6          6          6 
##     polish    germans      reich  revisions      claim   corridor     demand 
##          6          5          5          5          4          4          4 
##    assured     border       city    finally impossible      issue 
##          3          3          3          3          3          3

단어 빈도 막대 그래프 그리기

library(ggplot2)
library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(RColorBrewer)
sorted_word_freq_df1 <- as.data.frame(sorted_word_freq1, stringsAsFactors = F)
roo_top15 <- sorted_word_freq_df1 %>% arrange(desc(Freq)) %>% head(15)
order1 <- arrange(roo_top15, Freq)$roo_word

sorted_word_freq_df2 <- as.data.frame(sorted_word_freq2, stringsAsFactors = F)
hit_top15 <- sorted_word_freq_df2 %>% arrange(desc(Freq)) %>% head(15)
order2 <- arrange(hit_top15, Freq)$hit_word

ggplot(data = roo_top15, aes(x = roo_words, y =  Freq)) +
  ylim(0,max(roo_top15$Freq)) +
  geom_col() +
  coord_flip() +
  scale_x_discrete(limit = order1)

ggplot(data = hit_top15, aes(x = hit_words, y =  Freq)) +
  ylim(0,max(hit_top15$Freq)) +
  geom_col() +
  coord_flip() +
  scale_x_discrete(limit=order2)

워드 클라우드 만들기

library(RColorBrewer)
install.packages("wordcloud")

## 'C:/Users/chosun/AppData/Local/R/win-library/4.3'의 위치에 패키지(들)을 설치합니다.
## (왜냐하면 'lib'가 지정되지 않았기 때문입니다)

## 패키지 'wordcloud'를 성공적으로 압축해제하였고 MD5 sums 이 확인되었습니다

## Warning: 패키지 'wordcloud'의 이전설치를 삭제할 수 없습니다

## Warning in file.copy(savedcopy, lib, recursive = TRUE):
## C:\Users\chosun\AppData\Local\R\win-library\4.3\00LOCK\wordcloud\libs\x64\wordcloud.dll를
## C:\Users\chosun\AppData\Local\R\win-library\4.3\wordcloud\libs\x64\wordcloud.dll로
## 복사하는데 문제가 발생했습니다: Permission denied

## Warning: 'wordcloud'를 복구하였습니다

## 
## 다운로드된 바이너리 패키지들은 다음의 위치에 있습니다
##  C:\Users\chosun\AppData\Local\Temp\RtmpCmsIdr\downloaded_packages

library(wordcloud)
pal <- brewer.pal(8, "Dark2")
set.seed(1234)
wordcloud(words = sorted_word_freq_df1$roo_word,
          freq = sorted_word_freq_df1$Freq,
          min.freq = 2,
          max.words = 130,
          random.order = F,
          rot.per = .2,
          scale = c(2, 0.3),
          colors = pal)

wordcloud(words = sorted_word_freq_df2$hit_word,
          freq = sorted_word_freq_df2$Freq,
          min.freq = 2,
          max.words = 130,
          random.order = F,
          rot.per = .1,
          scale = c(2.5, 0.3),
          colors = pal)

나온 결과물을 보면

기말 프로젝트

2023-12-13

자유민주적인 루즈벨트 대통령과 독재자 히틀러 연설문 비교

데이터 준비

단어 빈도표 만들기

단어 빈도 막대 그래프 그리기

워드 클라우드 만들기