First, recall the needed pakages.

Second, Import the data. We have two datasets for German speakers, One from Germany and another from Austria.

# twitter Duitse Politici Account (Germany)
twitter_Germany <- read_delim("twitterDuitsePoliticiAccount.csv", 
    delim = "\t", escape_double = FALSE, 
    trim_ws = TRUE)
## Rows: 284198 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): id, type, author, text, sender, url, keywords, mentions
## date (1): datePublished
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(twitter_Germany)

# twitter Oostenrijkse Politici Account (Austria)
twitter_Austria <- read_delim("twitterOostenrijksePoliticiAccount.csv", 
    delim = "\t", escape_double = FALSE, 
    trim_ws = TRUE)
## Rows: 188936 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr  (8): id, type, author, text, sender, url, keywords, mentions
## date (1): datePublished
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# View(twitter_Austria)

Third, Select the time period of the study and the required variables.

# Select tweets through 2022-01-01 till 2022-04-03
tweets_Germany <- filter(twitter_Germany, 
                  datePublished > "2022-01-01")

tweets_Austria <- filter(twitter_Austria, 
                  datePublished > "2022-01-01")

# select the targeted variables
tweets_Germany <- select(tweets_Germany, author, text, sender, datePublished, keywords, mentions)
tweets_Austria <- select(tweets_Austria, author, text, sender, datePublished, keywords, mentions)

The tweets writer (Sender)

Sender_freq_g <- str_squish(unlist(na.omit(toupper(str_squish(tweets_Germany$sender)))))
Senders <- data.frame(sort(table(Sender_freq_g), decreasing=TRUE))

df <- data.frame(Sender = Senders$Sender_freq_g, Freq=Senders$Freq)
kable(df, caption = "Germany")
Germany
Sender Freq
SPD PARTEIVORSTAND 🇪🇺 1697
CDU/CSU 1156
FDP 1008
JOANA COTAR 665
CSU 623
CEM ÖZDEMIR 587
CDU DEUTSCHLANDS 540
DIE LINKE 520
ALTERNATIVE FÜR 🇩🇪 DEUTSCHLAND 449
MARKUS SÖDER 407
CHRISTIAN LINDNER 359
KATRIN GÖRING-ECKARDT 299
BÜNDNIS 90/DIE GRÜNEN 276
JANINE WISSLER 236
DIETMAR BARTSCH 188
SUSANNE HENNIG-WELLSOW 114
AUßENMINISTERIN ANNALENA BAERBOCK 98
SAHRA WAGENKNECHT 82
ALICE WEIDEL 79
AMIRA MOHAMED ALI 71
OLAF SCHOLZ 71
ARMIN LASCHET 68
TINO CHRUPALLA 18
MARTIN SCHULZ 12
Sender_freq_a <- str_squish(unlist(na.omit(toupper(str_squish(tweets_Austria$sender)))))
Senders <- data.frame(sort(table(Sender_freq_a), decreasing=TRUE))

df <- data.frame(Sender = Senders$Sender_freq_a, Freq=Senders$Freq)
kable(df, caption = "Austria")
Austria
Sender Freq
RUDI ANSCHOBER 2277
PETER PILZ 1353
DAS NEUE ÖSTERREICH 560
BEATE MEINL-REISINGER 522
SPÖ 496
FPÖ 282
MATTHIAS STROLZ 276
DIE GRÜNEN 236
WERNER KOGLER 199
HAGEN REINHOLD, MDB 143
NORBERT HOFER 62
PAMELA RENDI-WAGNER 36
SEBASTIAN KURZ 17
MANFRED HAIMBUCHNER 13

Plot how frequent was Ukraine hashtaged on twitter.

hashtags_Germany <- data.frame(str_split_fixed(tweets_Germany$keywords, ",", 10), tweets_Germany$datePublished)
hashtags_Germany <- tibble(hashtags_Germany)

hashtags_Germany$X1 <- str_detect(toupper(str_squish(hashtags_Germany$X1)), "UKRAIN.*")
hashtags_Germany$X2 <- str_detect(toupper(str_squish(hashtags_Germany$X2)), "UKRAIN.*")
hashtags_Germany$X3 <- str_detect(toupper(str_squish(hashtags_Germany$X3)), "UKRAIN.*")
hashtags_Germany$X4 <- str_detect(toupper(str_squish(hashtags_Germany$X4)), "UKRAIN.*")
hashtags_Germany$X5 <- str_detect(toupper(str_squish(hashtags_Germany$X5)), "UKRAIN.*")
hashtags_Germany$X6 <- str_detect(toupper(str_squish(hashtags_Germany$X6)), "UKRAIN.*")
hashtags_Germany$X7 <- str_detect(toupper(str_squish(hashtags_Germany$X7)), "UKRAIN.*")
hashtags_Germany$X8 <- str_detect(toupper(str_squish(hashtags_Germany$X8)), "UKRAIN.*")
hashtags_Germany$X9 <- str_detect(toupper(str_squish(hashtags_Germany$X9)), "UKRAIN.*")
hashtags_Germany$X10 <- str_detect(toupper(str_squish(hashtags_Germany$X10)), "UKRAIN.*")

hashtags_Germany$Count_g <- hashtags_Germany$X1+hashtags_Germany$X2+hashtags_Germany$X3+hashtags_Germany$X4+hashtags_Germany$X5+hashtags_Germany$X6+hashtags_Germany$X7+hashtags_Germany$X8+hashtags_Germany$X9+hashtags_Germany$X10
########################
hashtags_Austria <- data.frame(str_split_fixed(tweets_Austria$keywords, ",", 10), tweets_Austria$datePublished)
hashtags_Austria <- tibble(hashtags_Austria)

hashtags_Austria$X1 <- str_detect(toupper(str_squish(hashtags_Austria$X1)), "UKRAIN.*")
hashtags_Austria$X2 <- str_detect(toupper(str_squish(hashtags_Austria$X2)), "UKRAIN.*")
hashtags_Austria$X3 <- str_detect(toupper(str_squish(hashtags_Austria$X3)), "UKRAIN.*")
hashtags_Austria$X4 <- str_detect(toupper(str_squish(hashtags_Austria$X4)), "UKRAIN.*")
hashtags_Austria$X5 <- str_detect(toupper(str_squish(hashtags_Austria$X5)), "UKRAIN.*")
hashtags_Austria$X6 <- str_detect(toupper(str_squish(hashtags_Austria$X6)), "UKRAIN.*")
hashtags_Austria$X7 <- str_detect(toupper(str_squish(hashtags_Austria$X7)), "UKRAIN.*")
hashtags_Austria$X8 <- str_detect(toupper(str_squish(hashtags_Austria$X8)), "UKRAIN.*")
hashtags_Austria$X9 <- str_detect(toupper(str_squish(hashtags_Austria$X9)), "UKRAIN.*")
hashtags_Austria$X10 <- str_detect(toupper(str_squish(hashtags_Austria$X10)), "UKRAIN.*")

hashtags_Austria$Count_a <- hashtags_Austria$X1+hashtags_Austria$X2+hashtags_Austria$X3+hashtags_Austria$X4+hashtags_Austria$X5+hashtags_Austria$X6+hashtags_Austria$X7+hashtags_Austria$X8+hashtags_Austria$X9+hashtags_Austria$X10

Plotting the number of hashtaging UKRAINE

Agg_hashtags_g <- aggregate(Count_g ~ tweets_Germany.datePublished, data = hashtags_Germany, sum)
Agg_hashtags_a <- aggregate(Count_a ~ tweets_Austria.datePublished, data = hashtags_Austria, sum)
plot(Agg_hashtags_g$tweets_Germany.datePublished, Agg_hashtags_g$Count, type = "l", xlab = "Date", ylab = "Number of hashtaging UKRAINE")
lines(Agg_hashtags_a$tweets_Austria.datePublished, Agg_hashtags_a$Count, col = "red", type = "l")
legend("topleft", legend=c("Germany", "Austria"),
       col=c("Black", "Red"), lty=1, cex=0.8)

# Word cloud

# Germany
hashtags_freq_g <- str_squish(unlist(str_split(na.omit(toupper(str_squish(tweets_Germany$keywords))), ",")))
docs <- Corpus(VectorSource(hashtags_freq_g))
dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df <- data.frame(word = names(words),freq=words)

set.seed(1234) # for reproducibility 
wordcloud(words = df$word, freq = df$freq, min.freq = 1,  max.words=200, random.order=FALSE, rot.per=0.35,            colors=brewer.pal(8, "Dark2"))

kable(df[1:30, ], caption = "Germany")
Germany
word freq
ukraine ukraine 624
afd afd 303
bundestag bundestag 205
impfpflicht impfpflicht 194
corona corona 188
ampel ampel 166
russland russland 134
putin putin 126
bundesversammlung bundesversammlung 118
bundesregierung bundesregierung 107
teamcdu teamcdu 104
3k22 3k22 92
cdupt22 cdupt22 91
steinmeier steinmeier 85
aufinsneue aufinsneue 82
cdu cdu 81
deutschland deutschland 80
bundespräsident bundespräsident 78
saarland saarland 78
europa europa 65
dbdk22 dbdk22 64
freiheit freiheit 63
bundeswehr bundeswehr 60
omikron omikron 59
spd spd 57
habeck habeck 55
inflation inflation 54
bayern bayern 51
scholz scholz 51
standwithukraine standwithukraine 48
df1 <- df[1:15,]


# Austria
hashtags_freq_a <- str_squish(unlist(str_split(na.omit(toupper(str_squish(tweets_Austria$keywords))), ",")))
docs <- Corpus(VectorSource(hashtags_freq_a))
dtm <- TermDocumentMatrix(docs) 
matrix <- as.matrix(dtm) 
words <- sort(rowSums(matrix),decreasing=TRUE) 
df <- data.frame(word = names(words),freq=words)

set.seed(1234) # for reproducibility 
wordcloud(words = df$word, freq = df$freq, min.freq = 1,  max.words=200, random.order=FALSE, rot.per=0.35,            colors=brewer.pal(8, "Dark2"))

kable(df[1:30, ], caption = "Austria")
Austria
word freq
ukraine ukraine 147
bmichats bmichats 114
övp övp 91
sobotka sobotka 79
oevpua oevpua 58
zib2 zib2 44
oenr oenr 44
rotesfoyer rotesfoyer 40
oevpkorruptionsua oevpkorruptionsua 37
putin putin 36
longcovid longcovid 31
oevp oevp 28
russland russland 27
breaking breaking 23
covid19 covid19 22
omikron omikron 22
standwithukraine standwithukraine 20
kloibmüller kloibmüller 20
imzentrum imzentrum 19
wksta wksta 19
einland einland 17
hessenthaler hessenthaler 17
yeswecare yeswecare 16
covid covid 16
covid19at covid19at 15
neutralität neutralität 15
sideletter sideletter 15
nehammer nehammer 13
weremember weremember 13
wolf wolf 12
df2 <- df[1:15,]
par(mfrow = c(1,2))
barplot(df1$freq, names.arg = df1$word, las=2, col = 2, ylab = "Word frequency", xlab = "Hashtags", main = "Germany")

barplot(df2$freq, names.arg = df2$word, las=2, col = 3, ylab = "Word frequency", xlab = "Hashtags", main = "Austria")

Tweets analysis

#Create a vector containing only the text
Text_g <- tweets_Germany$text
Text_a <- tweets_Austria$text

# Germany
# clean the text
Text_g <- gsub("#\\S*", "", Text_g)
Text_g <- gsub("https\\S*", "", Text_g) 
Text_g <- gsub("@\\S*", "", Text_g)
Text_g <- gsub("amp", "", Text_g) 
Text_g <- gsub("[\r\n]", "", Text_g)
Text_g <- gsub("[[:punct:]]", "", Text_g)
Text_g <- gsub("\\d", "", Text_g)
Text_g <- na.omit(toupper(str_squish(Text_g)))

ger = corpus(Text_g) %>% 
  tokens(remove_punct=T) %>% 
  dfm() %>%
  dfm_remove(stopwords("german")) %>%
  dfm_remove(stopwords("english"))
textplot_wordcloud(ger, max_words=200)

# Austria
# clean the text
Text_a <- gsub("#\\S*", "", Text_a)
Text_a <- gsub("https\\S*", "", Text_a) 
Text_a <- gsub("@\\S*", "", Text_a)
Text_a <- gsub("amp", "", Text_a) 
Text_a <- gsub("[\r\n]", "", Text_a)
Text_a <- gsub("[[:punct:]]", "", Text_a)
Text_a <- gsub("\\d", "", Text_a)
Text_a <- na.omit(toupper(str_squish(Text_a)))

aus = corpus(Text_a) %>% 
  tokens(remove_punct=T) %>% 
  dfm() %>%
  dfm_remove(stopwords("german")) %>%
  dfm_remove(stopwords("english"))
textplot_wordcloud(aus, max_words=200)

# Germany
words <- sort(colSums(ger), decreasing = T)
df <- data.frame(word = names(words), freq=words)
df <- df[df$freq > 200, ]
barplot(df$freq, names.arg = df$word, las=2, col = 2, main = "Germany")

#Austria
words <- sort(colSums(aus), decreasing = T)
df <- data.frame(word = names(words), freq=words)
df <- df[df$freq > 200, ]
barplot(df$freq, names.arg = df$word, las=2, col = 2, main = "Austria")

# Sentement Analysis

# Germany
tg <- iconv(Text_g)
s1 <- get_nrc_sentiment(tg, language = "german")
## Warning: `spread_()` was deprecated in tidyr 1.2.0.
## Please use `spread()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
barplot(colSums(s1),
        las = 2,
        col = rainbow(10),
        ylab = 'Count',
        main = 'Sentiment Scores Tweets')

values_g <- get_sentiment(Text_g, method = "syuzhet", language = "german")
simple_plot(values_g)

#Austria
ta <- iconv(Text_a)
s2 <- get_nrc_sentiment(ta, language = "german")
barplot(colSums(s2),
        las = 2,
        col = rainbow(10),
        ylab = 'Count',
        main = 'Sentiment Scores Tweets')

values_a <- get_sentiment(Text_a, method = "syuzhet", language = "german")
simple_plot(values_a)

Latent Dirichlet Allocation (LDA)

# Germany
lda_g = ger %>% 
  convert(to = "topicmodels") %>%
  LDA(k=10,control=list(seed=123, alpha = 1/1:10))
terms(lda_g, 10)
##       Topic 1       Topic 2       Topic 3         Topic 4     Topic 5   
##  [1,] "bayern"      "herzlichen"  "dass"          "unsere"    "leben"   
##  [2,] "zukunft"     "glückwunsch" "scholz"        "heute"     "frauen"  
##  [3,] "minister"    "land"        "olaf"          "opfer"     "freiheit"
##  [4,] "ja"          "unsere"      "ganze"         "müssen"    "dass"    
##  [5,] "danke"       "frankwalter" "entscheidung"  "menschen"  "immer"   
##  [6,] "gemeinsam"   "liebe"       "bundeskanzler" "jahre"     "unsere"  
##  [7,] "fortschritt" "dass"        "robert"        "tag"       "kinder"  
##  [8,] "ukraine"     "milliarden"  "bedeutet"      "jahren"    "macht"   
##  [9,] "deutschland" "dank"        "deutschen"     "dass"      "debatte" 
## [10,] "berlin"      "zeiten"      "deutschland"   "vergessen" "heute"   
##       Topic 6      Topic 7 Topic 8       Topic 9    Topic 10              
##  [1,] "krieg"      "heute" "müssen"      "uhr"      "mehr"                
##  [2,] "ukraine"    "gute"  "bm"          "ab"       "menschen"            
##  [3,] "presseinfo" "mehr"  "geht"        "live"     "müssen"              
##  [4,] "europa"     "wahl"  "dass"        "brauchen" "fordert"             
##  [5,] "menschen"   "dass"  "brauchen"    "mehr"     "bundesfinanzminister"
##  [6,] "stehen"     "abend" "menschen"    "heute"    "dass"                
##  [7,] "dass"       "danke" "sagt"        "dabei"    "maßnahmen"           
##  [8,] "heute"      "spd"   "darum"       "statt"    "euro"                
##  [9,] "putin"      "neuen" "mehr"        "beim"     "braucht"             
## [10,] "russland"   "neue"  "deutschland" "geht"     "bürger"
# Austria
lda_a = aus %>% 
  convert(to = "topicmodels") %>%
  LDA(k=10,control=list(seed=123, alpha = 1/1:10))
terms(lda_a, 10)
##       Topic 1     Topic 2      Topic 3     Topic 4      Topic 5      
##  [1,] "heute"     "heute"      "danke"     "dass"       "menschen"   
##  [2,] "dass"      "geht"       "regierung" "menschen"   "österreich" 
##  [3,] "immer"     "mehr"       "dass"      "europa"     "mehr"       
##  [4,] "nehammer"  "sobotka"    "via"       "unsere"     "dass"       
##  [5,] "menschen"  "wien"       "ukraine"   "ukraine"    "regierung"  
##  [6,] "regierung" "euro"       "heute"     "österreich" "geht"       
##  [7,] "övp"       "österreich" "wurde"     "demokratie" "tun"        
##  [8,] "wurde"     "teil"       "gerade"    "krieg"      "impfpflicht"
##  [9,] "tag"       "h"          "gute"      "angriff"    "müssen"     
## [10,] "d"         "wolfgang"   "övp"       "gibt"       "ja"         
##       Topic 6        Topic 7      Topic 8      Topic 9     Topic 10    
##  [1,] "interview"    "dass"       "dass"       "russian"   "menschen"  
##  [2,] "abhängigkeit" "ukraine"    "heute"      "ukraine"   "regierung" 
##  [3,] "jahren"       "heute"      "unsere"     "russia"    "viele"     
##  [4,] "dass"         "sanktionen" "mehr"       "people"    "dass"      
##  [5,] "regierung"    "mehr"       "övp"        "kyiv"      "immer"     
##  [6,] "sagt"         "nie"        "russland"   "ukrainian" "mehr"      
##  [7,] "seit"         "sicherheit" "österreich" "us"        "övp"       
##  [8,] "russischem"   "wirklich"   "schon"      "breaking"  "österreich"
##  [9,] "dr"           "pandemie"   "ganz"       "now"       "müssen"    
## [10,] "wäre"         "österreich" "chats"      "new"       "seit"