First, recall the needed pakages.
Second, Import the data. We have two datasets for German speakers,
One from Germany and another from Austria.
# twitter Duitse Politici Account (Germany)
twitter_Germany <- read_delim("twitterDuitsePoliticiAccount.csv",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
## Rows: 284198 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): id, type, author, text, sender, url, keywords, mentions
## date (1): datePublished
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(twitter_Germany)
# twitter Oostenrijkse Politici Account (Austria)
twitter_Austria <- read_delim("twitterOostenrijksePoliticiAccount.csv",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
## Rows: 188936 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): id, type, author, text, sender, url, keywords, mentions
## date (1): datePublished
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(twitter_Austria)
Third, Select the time period of the study and the required
variables.
# Select tweets through 2022-01-01 till 2022-04-03
tweets_Germany <- filter(twitter_Germany,
datePublished > "2022-01-01")
tweets_Austria <- filter(twitter_Austria,
datePublished > "2022-01-01")
# select the targeted variables
tweets_Germany <- select(tweets_Germany, author, text, sender, datePublished, keywords, mentions)
tweets_Austria <- select(tweets_Austria, author, text, sender, datePublished, keywords, mentions)
Plotting the number of hashtaging UKRAINE
Agg_hashtags_g <- aggregate(Count_g ~ tweets_Germany.datePublished, data = hashtags_Germany, sum)
Agg_hashtags_a <- aggregate(Count_a ~ tweets_Austria.datePublished, data = hashtags_Austria, sum)
plot(Agg_hashtags_g$tweets_Germany.datePublished, Agg_hashtags_g$Count, type = "l", xlab = "Date", ylab = "Number of hashtaging UKRAINE")
lines(Agg_hashtags_a$tweets_Austria.datePublished, Agg_hashtags_a$Count, col = "red", type = "l")
legend("topleft", legend=c("Germany", "Austria"),
col=c("Black", "Red"), lty=1, cex=0.8)
# Word cloud
# Germany
hashtags_freq_g <- str_squish(unlist(str_split(na.omit(toupper(str_squish(tweets_Germany$keywords))), ",")))
docs <- Corpus(VectorSource(hashtags_freq_g))
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2"))

kable(df[1:30, ], caption = "Germany")
Germany
| ukraine |
ukraine |
624 |
| afd |
afd |
303 |
| bundestag |
bundestag |
205 |
| impfpflicht |
impfpflicht |
194 |
| corona |
corona |
188 |
| ampel |
ampel |
166 |
| russland |
russland |
134 |
| putin |
putin |
126 |
| bundesversammlung |
bundesversammlung |
118 |
| bundesregierung |
bundesregierung |
107 |
| teamcdu |
teamcdu |
104 |
| 3k22 |
3k22 |
92 |
| cdupt22 |
cdupt22 |
91 |
| steinmeier |
steinmeier |
85 |
| aufinsneue |
aufinsneue |
82 |
| cdu |
cdu |
81 |
| deutschland |
deutschland |
80 |
| bundespräsident |
bundespräsident |
78 |
| saarland |
saarland |
78 |
| europa |
europa |
65 |
| dbdk22 |
dbdk22 |
64 |
| freiheit |
freiheit |
63 |
| bundeswehr |
bundeswehr |
60 |
| omikron |
omikron |
59 |
| spd |
spd |
57 |
| habeck |
habeck |
55 |
| inflation |
inflation |
54 |
| bayern |
bayern |
51 |
| scholz |
scholz |
51 |
| standwithukraine |
standwithukraine |
48 |
df1 <- df[1:15,]
# Austria
hashtags_freq_a <- str_squish(unlist(str_split(na.omit(toupper(str_squish(tweets_Austria$keywords))), ",")))
docs <- Corpus(VectorSource(hashtags_freq_a))
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix),decreasing=TRUE)
df <- data.frame(word = names(words),freq=words)
set.seed(1234) # for reproducibility
wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words=200, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2"))

kable(df[1:30, ], caption = "Austria")
Austria
| ukraine |
ukraine |
147 |
| bmichats |
bmichats |
114 |
| övp |
övp |
91 |
| sobotka |
sobotka |
79 |
| oevpua |
oevpua |
58 |
| zib2 |
zib2 |
44 |
| oenr |
oenr |
44 |
| rotesfoyer |
rotesfoyer |
40 |
| oevpkorruptionsua |
oevpkorruptionsua |
37 |
| putin |
putin |
36 |
| longcovid |
longcovid |
31 |
| oevp |
oevp |
28 |
| russland |
russland |
27 |
| breaking |
breaking |
23 |
| covid19 |
covid19 |
22 |
| omikron |
omikron |
22 |
| standwithukraine |
standwithukraine |
20 |
| kloibmüller |
kloibmüller |
20 |
| imzentrum |
imzentrum |
19 |
| wksta |
wksta |
19 |
| einland |
einland |
17 |
| hessenthaler |
hessenthaler |
17 |
| yeswecare |
yeswecare |
16 |
| covid |
covid |
16 |
| covid19at |
covid19at |
15 |
| neutralität |
neutralität |
15 |
| sideletter |
sideletter |
15 |
| nehammer |
nehammer |
13 |
| weremember |
weremember |
13 |
| wolf |
wolf |
12 |
df2 <- df[1:15,]
par(mfrow = c(1,2))
barplot(df1$freq, names.arg = df1$word, las=2, col = 2, ylab = "Word frequency", xlab = "Hashtags", main = "Germany")
barplot(df2$freq, names.arg = df2$word, las=2, col = 3, ylab = "Word frequency", xlab = "Hashtags", main = "Austria")

Latent Dirichlet Allocation (LDA)
# Germany
lda_g = ger %>%
convert(to = "topicmodels") %>%
LDA(k=10,control=list(seed=123, alpha = 1/1:10))
terms(lda_g, 10)
## Topic 1 Topic 2 Topic 3 Topic 4 Topic 5
## [1,] "bayern" "herzlichen" "dass" "unsere" "leben"
## [2,] "zukunft" "glückwunsch" "scholz" "heute" "frauen"
## [3,] "minister" "land" "olaf" "opfer" "freiheit"
## [4,] "ja" "unsere" "ganze" "müssen" "dass"
## [5,] "danke" "frankwalter" "entscheidung" "menschen" "immer"
## [6,] "gemeinsam" "liebe" "bundeskanzler" "jahre" "unsere"
## [7,] "fortschritt" "dass" "robert" "tag" "kinder"
## [8,] "ukraine" "milliarden" "bedeutet" "jahren" "macht"
## [9,] "deutschland" "dank" "deutschen" "dass" "debatte"
## [10,] "berlin" "zeiten" "deutschland" "vergessen" "heute"
## Topic 6 Topic 7 Topic 8 Topic 9 Topic 10
## [1,] "krieg" "heute" "müssen" "uhr" "mehr"
## [2,] "ukraine" "gute" "bm" "ab" "menschen"
## [3,] "presseinfo" "mehr" "geht" "live" "müssen"
## [4,] "europa" "wahl" "dass" "brauchen" "fordert"
## [5,] "menschen" "dass" "brauchen" "mehr" "bundesfinanzminister"
## [6,] "stehen" "abend" "menschen" "heute" "dass"
## [7,] "dass" "danke" "sagt" "dabei" "maßnahmen"
## [8,] "heute" "spd" "darum" "statt" "euro"
## [9,] "putin" "neuen" "mehr" "beim" "braucht"
## [10,] "russland" "neue" "deutschland" "geht" "bürger"
# Austria
lda_a = aus %>%
convert(to = "topicmodels") %>%
LDA(k=10,control=list(seed=123, alpha = 1/1:10))
terms(lda_a, 10)
## Topic 1 Topic 2 Topic 3 Topic 4 Topic 5
## [1,] "heute" "heute" "danke" "dass" "menschen"
## [2,] "dass" "geht" "regierung" "menschen" "österreich"
## [3,] "immer" "mehr" "dass" "europa" "mehr"
## [4,] "nehammer" "sobotka" "via" "unsere" "dass"
## [5,] "menschen" "wien" "ukraine" "ukraine" "regierung"
## [6,] "regierung" "euro" "heute" "österreich" "geht"
## [7,] "övp" "österreich" "wurde" "demokratie" "tun"
## [8,] "wurde" "teil" "gerade" "krieg" "impfpflicht"
## [9,] "tag" "h" "gute" "angriff" "müssen"
## [10,] "d" "wolfgang" "övp" "gibt" "ja"
## Topic 6 Topic 7 Topic 8 Topic 9 Topic 10
## [1,] "interview" "dass" "dass" "russian" "menschen"
## [2,] "abhängigkeit" "ukraine" "heute" "ukraine" "regierung"
## [3,] "jahren" "heute" "unsere" "russia" "viele"
## [4,] "dass" "sanktionen" "mehr" "people" "dass"
## [5,] "regierung" "mehr" "övp" "kyiv" "immer"
## [6,] "sagt" "nie" "russland" "ukrainian" "mehr"
## [7,] "seit" "sicherheit" "österreich" "us" "övp"
## [8,] "russischem" "wirklich" "schon" "breaking" "österreich"
## [9,] "dr" "pandemie" "ganz" "now" "müssen"
## [10,] "wäre" "österreich" "chats" "new" "seit"