기말 프로젝트

자유민주적인 루즈벨트 대통령과 독재자 히틀러 연설문 비교

데이터 준비

options(repos = c(CRAN = "https://cran.rstudio.com"))
file_path1 <- "C:\\Users\\chosun\\Desktop\\R\\roosevelt speech script.txt"
file_path2 <- "C:\\Users\\chosun\\Desktop\\R\\hit speech script.txt"

text1 <- tolower(readLines(file_path1, warn = FALSE))
text2 <- tolower(readLines(file_path2, warn = FALSE))

roo_text <- paste(text1, collapse = " ")
roo_text <- tolower(roo_text)
roo_text <- gsub("[[:punct:]]", "", roo_text)


hit_text <- paste(text2, collapse = " ")
hit_text <- tolower(hit_text)
hit_text <- gsub("[[:punct:]]", "", hit_text)

roo_words <- strsplit(roo_text, "\\s+")
roo_words <- unlist(roo_words)
roo_words <- roo_words[roo_words != ""]

hit_words <- strsplit(hit_text, "\\s+")
hit_words <- unlist(hit_words)
hit_words <- hit_words[hit_words != ""]

stop <- c(

  "a", "about", "above", "across", "after", "again", "against", "all", "almost", "alone",

  "along", "already", "also", "although", "always", "am", "among", "an", "and", "another",

  "any", "anybody", "anyone", "anything", "anywhere", "are", "area", "areas", "aren't", "around",

  "as", "ask", "asked", "asking", "asks", "at", "away", "b", "back", "backed", "backing", "backs",

  "be", "became", "because", "become", "becomes", "been", "before", "began", "behind", "being",

  "beings", "below", "best", "better", "between", "big", "both", "but", "by", "c", "came", "can",

  "cannot", "can't", "case", "cases", "certain", "certainly", "clear", "clearly", "come", "could",

  "couldn't", "d", "did", "didn't", "differ", "different", "differently", "do", "does", "doesn't",

  "doing", "done", "don't", "down", "downed", "downing", "downs", "during", "e", "each", "early",

  "either", "end", "ended", "ending", "ends", "enough", "even", "evenly", "ever", "every", "everybody",

  "everyone", "everything", "everywhere", "f", "face", "faces", "fact", "facts", "far", "felt", "few",

  "find", "finds", "first", "for", "four", "from", "full", "fully", "further", "furthered", "furthering",

  "furthers", "g", "gave", "general", "generally", "get", "gets", "give", "given", "gives", "go", "going",

  "good", "goods", "got", "great", "greater", "greatest", "group", "grouped", "grouping", "groups", "h",

  "had", "hadn't", "has", "hasn't", "have", "haven't", "having", "he", "he'd", "he'll", "her", "here",

  "here's", "hers", "herself", "he's", "high", "higher", "highest", "him", "himself", "his", "how",

  "however", "how's", "i", "i'd", "if", "i'll", "i'm", "important", "in", "interest", "interested",

  "interesting", "interests", "into", "is", "isn't", "it", "its", "it's", "itself", "i've", "j", "just",

  "k", "keep", "keeps", "kind", "knew", "know", "known", "knows", "l", "large", "largely", "last", "later",

  "latest", "least", "less", "let", "lets", "let's", "like", "likely", "long", "longer", "longest", "m",

  "made", "make", "making", "man", "many", "may", "me", "member", "members", "men", "might", "more", "most",

  "mostly", "mr", "mrs", "much", "must", "mustn't", "my", "myself", "n", "necessary", "need", "needed",

  "needing", "needs", "never", "new", "newer", "newest", "next", "no", "nobody", "non", "noone", "nor",

  "not", "nothing", "now", "nowhere", "number", "numbers", "o", "of", "off", "often", "old", "older",

  "oldest", "on", "once", "one", "only", "open", "opened", "opening", "opens", "or", "order", "ordered",

  "ordering", "orders", "other", "others", "ought", "our", "ours", "ourselves", "out", "over", "own", "p",

  "part", "parted", "parting", "parts", "per", "perhaps", "place", "places", "point", "pointed", "pointing",

  "points", "possible", "present", "presented", "presenting", "presents", "problem", "problems", "put", "puts",

  "q", "quite", "r", "rather", "really", "right", "room", "rooms", "s", "said", "same", "saw", "say", "says",

  "second", "seconds", "see", "seem", "seemed", "seeming", "seems", "sees", "several", "shall", "shan't", "she",

  "she'd", "she'll", "she's", "should", "shouldn't", "show", "showed", "showing", "shows", "side", "sides",

  "since", "small", "smaller", "smallest", "so", "some", "somebody", "someone", "something", "somewhere",

  "state", "states", "still", "such", "sure", "t", "take", "taken", "than", "that", "that's", "the", "their",

  "theirs", "them", "themselves", "then", "there", "therefore", "there's", "these", "they", "they'd", "they'll",

  "they're", "they've", "thing", "things", "think", "thinks", "this", "those", "though", "thought", "thoughts",

  "three", "through", "thus", "to", "today", "together", "too", "took", "toward", "turn", "turned", "turning",

  "turns", "two", "u", "under", "until", "up", "upon", "us", "use", "used", "uses", "v", "very", "w", "want",

  "wanted", "wanting", "wants", "was", "wasn't", "way", "ways", "we", "we'd", "well", "we'll", "wells", "went",

  "were", "we're",   "weren't", "we've", "what", "what's", "when", "when's", "where", "where's", "whether", "which", "while",

  "who", "whole", "whom", "who's", "whose", "why", "why's", "will", "with", "within", "without", "won't",

  "work", "worked", "working", "works", "would", "wouldn't", "x", "y", "year", "years", "yes"

  , "yet", "you",

  "you'd", "you'll", "young", "younger", "youngest", "your", "you're", "yours", "yourself", "yourselves", "you've", "z", "unto", "thou", "thy", "thee")

단어 빈도표 만들기

roo_words[!roo_words %in% stop] -> roo_words
word_freq1 <- table(roo_words)
sorted_word_freq1 <- sort(word_freq1, decreasing = TRUE)
print(sorted_word_freq1[1:20])

## roo_words
##   national     people     action     helped leadership      world      money 
##         10          8          7          7          7          6          5 
##     nation   congress       days discipline       duty    efforts  emergency 
##          5          4          4          4          4          4          4 
##       hand   measures     public   respects       task       time 
##          4          4          4          4          4          4

hit_words[!hit_words %in% stop] -> hit_words
word_freq2 <- table(hit_words)
sorted_word_freq2 <- sort(word_freq2, decreasing = TRUE)
print(sorted_word_freq2[1:20])

## hit_words
##     german  proposals     danzig    germany government   peaceful     poland 
##         13         13          7          7          6          6          6 
##     polish    germans      reich  revisions      claim   corridor     demand 
##          6          5          5          5          4          4          4 
##    assured     border       city    finally impossible      issue 
##          3          3          3          3          3          3

단어 빈도 막대 그래프 그리기

library(ggplot2)
library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(RColorBrewer)
sorted_word_freq_df1 <- as.data.frame(sorted_word_freq1, stringsAsFactors = F)
roo_top15 <- sorted_word_freq_df1 %>% arrange(desc(Freq)) %>% head(15)
order1 <- arrange(roo_top15, Freq)$roo_word

sorted_word_freq_df2 <- as.data.frame(sorted_word_freq2, stringsAsFactors = F)
hit_top15 <- sorted_word_freq_df2 %>% arrange(desc(Freq)) %>% head(15)
order2 <- arrange(hit_top15, Freq)$hit_word

ggplot(data = roo_top15, aes(x = roo_words, y =  Freq)) +
  ylim(0,max(roo_top15$Freq)) +
  geom_col() +
  coord_flip() +
  scale_x_discrete(limit = order1)

ggplot(data = hit_top15, aes(x = hit_words, y =  Freq)) +
  ylim(0,max(hit_top15$Freq)) +
  geom_col() +
  coord_flip() +
  scale_x_discrete(limit=order2)

워드 클라우드 만들기

library(RColorBrewer)
install.packages("wordcloud")

## 'C:/Users/chosun/AppData/Local/R/win-library/4.3'의 위치에 패키지(들)을 설치합니다.
## (왜냐하면 'lib'가 지정되지 않았기 때문입니다)

## 패키지 'wordcloud'를 성공적으로 압축해제하였고 MD5 sums 이 확인되었습니다
## 
## 다운로드된 바이너리 패키지들은 다음의 위치에 있습니다
##  C:\Users\chosun\AppData\Local\Temp\Rtmp6fL6SN\downloaded_packages

library(wordcloud)
pal <- brewer.pal(8, "Dark2")
set.seed(1234)
wordcloud(words = sorted_word_freq_df1$roo_word,
          freq = sorted_word_freq_df1$Freq,
          min.freq = 2,
          max.words = 130,
          random.order = F,
          rot.per = .2,
          scale = c(2, 0.3),
          colors = pal)

wordcloud(words = sorted_word_freq_df2$hit_word,
          freq = sorted_word_freq_df2$Freq,
          min.freq = 2,
          max.words = 130,
          random.order = F,
          rot.per = .1,
          scale = c(2.5, 0.3),
          colors = pal)

위 데이터는 프랭클린 델라노 루즈벨트 대통령의 첫 취임사 연설문과 히틀러의 독일 리히스타그 연설문입니다. 각각 미국과 독일의 상황에 대한 리더십과 정책을 담은 내용입니다.

루즈벨트는 대공황으로 인한 어려움 속에서 국가를 통합하고 경제를 회복하기 위한 계획을 제시하며, 다양한 측면에서 메시지를 다가가고자 다양한 단어와 표현을 사용한 것으로 보입니다. 특히 “people”이라는 단어를 자주 사용하여 대중과의 소통과 협력을 강조하고 있습니다.

한편 히틀러는 간결하고 명확한 메시지를 전하고자 적은 단어를 반복 사용하며, “german”이라는 단어를 통해 독일에 대한 열정과 독일인에게 초점을 맞추어 이야기하고 있습니다.

루즈벨트는 대중과의 소통과 협력을 중시하며 다양한 측면에서 문제를 접근하고자 하고, 히틀러는 독일에 대한 강한 애정과 독일인에게 초점을 맞춘 간결한 메시지를 전하고자 하는 차이가 나타납니다.